In [1]:
# --- Block 1: Configuration (Long History) ---
%reset -f
import pandas as pd
import numpy as np
from datetime import datetime
import yfinance as yfn  
import scipy.optimize as sco
import matplotlib.patches as mpatches 
import matplotlib.pyplot as plt
import seaborn as sns
In [2]:
# --- BLOCK 1: MASTER DATA ACQUISITION (UPDATED DATES) ---
# 1. SET DYNAMIC DATES
START_DATE = "2020-01-01" 
END_DATE = datetime.today().strftime('%Y-%m-%d') # END DATE IS TODAY

# 2. ASSETS CONFIGURATION
RISK_FREE_SYMBOL = "^TNX"   #(CBOE 10-Year Treasury Note Yield)
FX_SYMBOL = "GBPUSD=X"    

print("="*60)
print(f"{'MASTER DATA PIPELINE: S&P 500 + MACRO':^60}")
print(f"{f'Timeline: {START_DATE} to {END_DATE}':^60}")
print("="*60)

def get_sp500_tickers():
    """Scrapes Wikipedia with timeout protection and fallback."""
    print("   > Connecting to Wikipedia...", end=" ", flush=True)
    try:
        tables = pd.read_html(
            "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies", 
            storage_options={'User-Agent': 'Mozilla/5.0'}
        )
        tickers = tables[0]["Symbol"].astype(str).str.replace(".", "-", regex=False).tolist()
        print(f"Success! Found {len(tickers)} tickers.")
        return tickers
    except Exception as e:
        print(f"\n   > ⚠️ Wikipedia Scrape Failed: {e}")
        print("   > Using Emergency Fallback List.")
        return ["AAPL", "MSFT", "GOOG", "AMZN", "NVDA", "TSLA", "META", "BRK-B", "JPM", "V", 
                "LLY", "AVGO", "PG", "MA", "HD", "CVX", "MRK", "ABBV", "PEP", "KO"]

def get_data(tickers, start, end):
    print(f"   > Initializing download for {len(tickers)} assets...", flush=True)

    # 1. ROBUST DOWNLOADER
    try:
        df = yfn.download(
            tickers, start=start, end=end, 
            progress=False,      # Clean output
            auto_adjust=True,    # Adjust for splits/dividends
            group_by="column", 
            threads=True,        # Speed up
            timeout=30           # Increased timeout for large history
        )
    except Exception as e:
        print(f"   > ❌ Critical Download Failure: {e}")
        return None, None, None

    # 2. DATA EXTRACTION
    if isinstance(df.columns, pd.MultiIndex):
        try:
            if "Close" in df.columns.get_level_values(0):
                 prices = df.xs("Close", level=0, axis=1)
            else:
                 prices = df.iloc[:, :len(tickers)]
        except:
             prices = df.iloc[:, :len(tickers)]
    else:
        prices = df

    # Remove Timezone & Align Index
    prices.index = pd.to_datetime(prices.index).tz_localize(None)
    business_days = pd.bdate_range(start=start, end=end)
    
    # 3. FILLING GAPS
    prices = prices.reindex(business_days).ffill(limit=3)
    
    # 4. FILTERING
    # Drop columns with >30% missing data immediately
    initial_count = prices.shape[1]
    prices = prices.dropna(axis=1, thresh=int(len(prices) * 0.7))
    dropped_count = initial_count - prices.shape[1]
    if dropped_count > 0:
        print(f"   > Dropped {dropped_count} assets (insufficient history since {start}).")

    # 5. MACRO DATA FETCH
    print("   > Fetching Macro Data (FX & Yields)...", flush=True)
    try:
        macro = yfn.download([FX_SYMBOL, RISK_FREE_SYMBOL], start=start, end=end, progress=False, auto_adjust=True)
        
        # Handle MultiIndex for Macro
        if isinstance(macro.columns, pd.MultiIndex):
             macro = macro.xs('Close', axis=1, level=0)
        else:
             if 'Close' in macro.columns:
                 macro = macro['Close']
        
        macro.index = pd.to_datetime(macro.index).tz_localize(None)
        macro = macro.reindex(business_days).ffill()
        
    except Exception as e:
        print(f"   > ❌ Macro Data Failed: {e}")
        return None, None, None

    # 6. ALIGNMENT & CONVERSION
    combined = prices.join(macro).dropna(subset=[FX_SYMBOL])
    
    # Convert Stocks to GBP
    prices_gbp = combined[prices.columns].div(combined[FX_SYMBOL], axis=0)

    # 7. RISK FREE RATE (Dynamic Calculation)
    raw_tnx_series = combined[RISK_FREE_SYMBOL]
    
    # Dynamic Math: If >10 (Index format), divide by 1000. If <10 (Percent format), divide by 100.
    rf_annual = np.where(raw_tnx_series > 10, raw_tnx_series / 1000.0, raw_tnx_series / 100.0)
    
    # Convert to Daily
    rf_daily_series = pd.Series(rf_annual / 252, index=combined.index)

    print(f"   > Done. Final Universe: {prices_gbp.shape[1]} assets.")
    return prices_gbp, rf_daily_series, combined[FX_SYMBOL], raw_tnx_series

# --- EXECUTION ---
sp500_tickers = get_sp500_tickers()
if sp500_tickers:
    universe = sp500_tickers + ["TLT", "BTC-USD"]
    
    # Unpack 4 values now
    prices_stocks, rf_daily, fx_rates, raw_tnx_series = get_data(universe, START_DATE, END_DATE)
    
    if prices_stocks is not None:
        # GLOBAL VARIABLE FIX:
        raw_tnx = raw_tnx_series 

        # VALIDATION CHECK
        last_rf = rf_daily.iloc[-1]
        last_rf_annual = last_rf * 252
        print("-" * 40)
        print(f"Sanity Check (Latest Date: {rf_daily.index[-1].date()}):")
        print(f"Risk Free Rate (Daily):   {last_rf:.6f}")
        print(f"Risk Free Rate (Annual):  {last_rf_annual:.2%}") 
        print(f"FX Rate (USD/GBP):        {fx_rates.iloc[-1]:.4f}")
        print("-" * 40)
        
        if last_rf_annual < 0.01:
            print("⚠️ WARNING: Risk Free Rate seems unusually low (<1%). Check ^TNX data.")
else:
    print("Error: Ticker list is empty.")
============================================================
           MASTER DATA PIPELINE: S&P 500 + MACRO            
             Timeline: 2020-01-01 to 2026-01-21             
============================================================
   > Connecting to Wikipedia... Success! Found 503 tickers.
   > Initializing download for 505 assets...
   > Dropped 8 assets (insufficient history since 2020-01-01).
   > Fetching Macro Data (FX & Yields)...
   > Done. Final Universe: 497 assets.
----------------------------------------
Sanity Check (Latest Date: 2026-01-21):
Risk Free Rate (Daily):   0.000170
Risk Free Rate (Annual):  4.30%
FX Rate (USD/GBP):        1.3418
----------------------------------------

According to the literature—and cited by Jegadeesh and Titman (1993)—the momentum effect represents perhaps the strongest evidence against the efficient markets hypothesis. The trend-following strategy, a type of momentum strategy, involves conducting a 1-year performance analysis of liquid stocks from developed countries, excluding the last month's performance to remove short-term noise. It then entails buying the top 10 performers and shorting the bottom 10 stocks.

Jegadeesh, N. and Titman, S. (2011). Momentum. SSRN Electronic Journal. doi:https://doi.org/10.2139/ssrn.1919226.

Following this therory, my question is why only the top 10 stock? After all a stock that is 20th in the list may have still have achieved a 80% annual performance and still be a good candidate for momentum trading. Consequently, based on this assumption, I've extended my stock pool to 30 stocks each (long and short candidate list).

In [3]:
# --- BLOCK 2: MOMENTUM SELECTION ENGINE (STRICT 252 - 21) ---
print("\n" + "="*60)
print(f"{'MOMENTUM SIGNAL GENERATION (12-1 MONTH STANDARD)':^60}")
print("="*60)

# 1. STRATEGY PARAMETERS
N_LONG = 60                 # Top 30 Winners
N_SHORT = 60                # Bottom 30 Losers

# STRICT DEFINITION: 
# We look back exactly 273 Days. 
# We ignore the last Month (21 Days) for the signal.
TOTAL_WINDOW = 273          
EXCLUDE_RECENT = 21         
FORMATION_WINDOW = TOTAL_WINDOW - EXCLUDE_RECENT # 252 Days

# 2. PREPARE DATA UNIVERSE
if 'prices_stocks' not in globals():
    print("❌ Critical Error: Data missing. Please run Block 1 first.")
else:
    prices_universe = prices_stocks
    print(f"Scanning {prices_universe.shape[1]} assets...")

    # 3. CALCULATE MOMENTUM
    # Log returns for mathematical correctness over long periods
    returns_universe = np.log(prices_universe / prices_universe.shift(1))
    
    # A. Get the full 273 trading days
    full_year_window = returns_universe.tail(TOTAL_WINDOW)
    
    # B. The Signal Window (Formation)
    # This takes the first 273 days of that year (dropping the last 21)
    formation_data = full_year_window.iloc[:FORMATION_WINDOW]
    
    # C. Calculate Total Return over the Formation Period
    # Summing log returns = Total Log Return
    period_log_return = formation_data.sum()
    
    # Convert back to simple % for readability: exp(log_ret) - 1
    period_simple_return = np.exp(period_log_return) - 1

    # 4. SELECT CANDIDATES
    # Winners: Highest returns in the Formation period
    long_candidates = period_simple_return.sort_values(ascending=False).head(N_LONG).index.tolist()

    # Losers: Lowest returns in the Formation period
    short_candidates = period_simple_return.sort_values(ascending=True).head(N_SHORT).index.tolist()

    # 5. OUTPUT REPORT
    latest_date = prices_universe.index[-1].date()
    start_date = full_year_window.index[0].date()
    signal_end_date = full_year_window.index[FORMATION_WINDOW].date()
    
    print("\n" + "-"*60)
    print(f"{'SELECTION REPORT':^60}")
    print("-" * 60)
    print(f"Total Analysis Window:   {TOTAL_WINDOW} days ({start_date} -> {latest_date})")
    print(f"Formation Period (Used): {FORMATION_WINDOW} days ({start_date} -> {signal_end_date})")
    print(f"Excluded Period (Skip):  {EXCLUDE_RECENT} days (Last month ignored)")
    print("-" * 60)

    # Print Winners
    print(f"\n[LONG CANDIDATES] Top {N_LONG} Winners:")
    print(f"{'Ticker':<10} | {'Formation Return':<18}")
    print("-" * 30)
    for t in long_candidates:
        print(f"{t:<10} | {period_simple_return[t]:.2%}")

    # Print Losers
    print(f"\n[SHORT CANDIDATES] Bottom {N_SHORT} Losers:")
    print(f"{'Ticker':<10} | {'Formation Return':<18}")
    print("-" * 30)
    for t in short_candidates:
        print(f"{t:<10} | {period_simple_return[t]:.2%}")
        
    print("\n✅ SELECTION COMPLETE.")
============================================================
      MOMENTUM SIGNAL GENERATION (12-1 MONTH STANDARD)      
============================================================
Scanning 497 assets...

------------------------------------------------------------
                      SELECTION REPORT                      
------------------------------------------------------------
Total Analysis Window:   273 days (2025-01-06 -> 2026-01-21)
Formation Period (Used): 252 days (2025-01-06 -> 2025-12-24)
Excluded Period (Skip):  21 days (Last month ignored)
------------------------------------------------------------

[LONG CANDIDATES] Top 60 Winners:
Ticker     | Formation Return  
------------------------------
WDC        | 239.60%
STX        | 196.44%
MU         | 183.35%
HOOD       | 167.26%
NEM        | 158.14%
WBD        | 153.47%
CVNA       | 126.90%
PLTR       | 123.34%
LRCX       | 116.30%
FIX        | 98.51%
APP        | 90.94%
APH        | 81.17%
TPR        | 81.05%
KLAC       | 79.00%
HII        | 77.30%
GLW        | 76.15%
HWM        | 71.27%
DG         | 69.49%
GE         | 69.41%
CVS        | 63.41%
INTC       | 62.50%
CAH        | 61.36%
C          | 58.85%
AMD        | 57.55%
ALB        | 56.77%
TEL        | 52.02%
GOOGL      | 51.23%
IDXX       | 50.92%
GOOG       | 50.82%
RTX        | 50.10%
DLTR       | 49.76%
NRG        | 49.54%
CAT        | 49.37%
GM         | 48.43%
CHRW       | 48.02%
HCA        | 47.95%
IVZ        | 46.48%
GS         | 45.77%
STLD       | 45.60%
JBL        | 44.61%
RL         | 43.61%
EXPE       | 43.51%
BK         | 43.15%
AMAT       | 42.05%
TKO        | 40.98%
MPWR       | 40.83%
JCI        | 40.44%
TER        | 40.30%
EL         | 39.52%
AVGO       | 39.31%
WYNN       | 39.18%
WELL       | 38.89%
FOXA       | 38.66%
COR        | 37.63%
HAS        | 36.99%
CMI        | 36.81%
MNST       | 36.26%
JNJ        | 35.18%
MS         | 34.67%
INCY       | 33.20%

[SHORT CANDIDATES] Bottom 60 Losers:
Ticker     | Formation Return  
------------------------------
TTD        | -71.76%
FISV       | -70.16%
DECK       | -55.35%
ARE        | -53.26%
IT         | -52.65%
MOH        | -48.81%
LULU       | -48.31%
CHTR       | -47.35%
FDS        | -43.07%
CLX        | -42.96%
DOW        | -42.59%
ZBRA       | -42.12%
GDDY       | -41.87%
CMG        | -41.82%
STZ        | -40.87%
LYB        | -40.83%
CNC        | -40.80%
UNH        | -40.38%
BAX        | -39.95%
LW         | -39.85%
CAG        | -39.67%
PYPL       | -37.52%
CPB        | -36.91%
CPRT       | -36.77%
ERIE       | -36.15%
POOL       | -36.14%
XYZ        | -35.24%
BLDR       | -34.46%
NOW        | -33.94%
GPN        | -33.75%
HPQ        | -33.59%
TGT        | -33.36%
TPL        | -31.89%
DVA        | -31.00%
SW         | -30.95%
OKE        | -30.93%
ALGN       | -30.82%
SWKS       | -30.49%
BF-B       | -30.39%
GIS        | -29.82%
MRNA       | -29.64%
ZTS        | -29.55%
ACN        | -28.67%
IP         | -28.58%
CARR       | -28.50%
PCG        | -28.19%
PAYC       | -27.52%
TYL        | -27.27%
IRM        | -27.21%
BRO        | -27.19%
VRSK       | -26.83%
CRM        | -26.79%
EIX        | -26.77%
HRL        | -26.42%
NKE        | -26.42%
KMB        | -26.19%
CDW        | -26.03%
TECH       | -25.78%
CMCSA      | -25.61%
LEN        | -25.57%

✅ SELECTION COMPLETE.

Implementing this strategy is extremely risky, as it exposes the investor to reversal risk and potential high losses. It's important to bear in mind that, as cited by Fan et al. (2012), although momentum strategies exhibit persistent profitability, their returns are volatile and prone to crash risks during specific periods. Therefore, simply selecting the top 10 stocks from these two lists feels too risky, mainly during period of market uncertainty, so I think it would make sense to also value in their last month's performance (21 trading days), which was excluded in the stock pool selection mechanism, to gain a clearer view of short-term performance. This, in my opinion, would increase the chances of picking the most likely winners (to long) and losers (to short).

Fan, M., Kearney, F., Li, Y. and Liu, J., 2022. Momentum and the Cross-section of Stock Volatility. Journal of Economic Dynamics and Control, 144, p.104524.

In [4]:
# --- BLOCK 3: MOMENTUM DIAGNOSTIC (LINKED TO BLOCK 2) ---
# Import adjustText
try:
    from adjustText import adjust_text
except ImportError:
    import subprocess, sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "adjustText"])
    from adjustText import adjust_text

print("\n" + "="*60)
print(f"{'MOMENTUM SIGNAL DIAGNOSTIC CHART':^60}")
print("="*60)

# 1. LINKING TO BLOCK 2 (REUSE VARIABLES)
required_vars = ['long_candidates', 'short_candidates', 'period_simple_return', 'TOTAL_WINDOW', 'EXCLUDE_RECENT', 'prices_universe']

if not all(var in globals() for var in required_vars):
    print("❌ Critical Error: Variables from Block 2 are missing.")
    print("   Please run Block 2 immediately before this block.")
else:
    print(f"🔗 Linked to Block 2 Logic:")
    print(f"   • Formation Window: {TOTAL_WINDOW - EXCLUDE_RECENT} days")
    print(f"   • Excluded Window:  {EXCLUDE_RECENT} days")
    
    # Combined list of tickers to plot
    target_tickers = long_candidates + short_candidates
    
    # 2. RETRIEVE DATA (NO RECALCULATION FOR X-AXIS)
    # X-AXIS: Formation Return (Already calculated in Block 2!)
    # We just subset the existing series for our targets
    long_term_vals = period_simple_return[target_tickers]
    
    # Y-AXIS: Recent Validation (Need to calculate this specific slice)
    # We use the 'prices_universe' from Block 2 to be safe
    # Calculate return for the last EXCLUDE_RECENT days (e.g., last 21 days)
    validation_prices = prices_universe[target_tickers].tail(EXCLUDE_RECENT + 1)
    short_term_vals = (validation_prices.iloc[-1] / validation_prices.iloc[0]) - 1

    # 3. ASSIGN COLORS
    colors = []
    for t in target_tickers:
        if t in long_candidates:
            colors.append('#2e7d32') # Forest Green (Long)
        elif t in short_candidates:
            colors.append('#d32f2f') # Red (Short)
        else:
            colors.append('gray')

    # 4. PLOT SCATTER
    plt.figure(figsize=(14, 10))
    ax = plt.gca()

    plt.scatter(
        long_term_vals,
        short_term_vals,
        c=colors,
        alpha=0.7,
        s=150,
        edgecolor='black',
        linewidth=0.8,
        zorder=2
    )

    

    # 5. QUADRANT LINES & ZONES
    plt.axvline(0, color='black', linestyle='--', linewidth=0.8, alpha=0.5)
    plt.axhline(0, color='black', linestyle='--', linewidth=0.8, alpha=0.5)

    # Zone Labels (Dynamic placement based on axes)
    props = dict(boxstyle='round', facecolor='white', alpha=0.7, edgecolor='none')
    
    # Top Right: Winners continuing to win
    plt.text(0.97, 0.97, "TREND CONTINUATION\n(Strong Buy)", transform=ax.transAxes, 
             ha='right', va='top', color='#2e7d32', fontweight='bold', bbox=props)
    
    # Bottom Left: Losers continuing to lose
    plt.text(0.03, 0.03, "DOWNTREND CONTINUATION\n(Strong Short)", transform=ax.transAxes, 
             ha='left', va='bottom', color='#d32f2f', fontweight='bold', bbox=props)
             
    # Top Left: Losers bouncing (Squeeze)
    plt.text(0.03, 0.97, "SHORT SQUEEZE RISK\n(Losers Bouncing)", transform=ax.transAxes, 
             ha='left', va='top', color='#f57f17', fontweight='bold', bbox=props)

    # Bottom Right: Winners crashing (Reversal)
    plt.text(0.97, 0.03, "MOMENTUM CRASH\n(Winners Dropping)", transform=ax.transAxes, 
             ha='right', va='bottom', color='#f57f17', fontweight='bold', bbox=props)

    # 6. SMART LABELS
    texts = []
    for t in target_tickers:
        texts.append(plt.text(long_term_vals[t], short_term_vals[t], t, 
                              fontsize=9, fontweight='bold', color='#333333'))
    
    try:
        adjust_text(texts, arrowprops=dict(arrowstyle='-', color='gray', alpha=0.5, lw=0.5))
    except:
        pass

    # 7. FINAL POLISH
    latest_date = prices_universe.index[-1].date()
    formation_days = TOTAL_WINDOW - EXCLUDE_RECENT
    
    plt.title(f"Diagnostic: {formation_days}-Day Trend vs {EXCLUDE_RECENT}-Day Reality (Ended {latest_date})", 
              fontsize=14, fontweight='bold')
    plt.xlabel(f"Formation Return (From Block 2)", fontsize=11, fontweight='medium')
    plt.ylabel(f"Validation Return (Last {EXCLUDE_RECENT} Days)", fontsize=11, fontweight='medium')
    plt.grid(True, alpha=0.2, linestyle=':')
    
    # Legend
    long_patch = mpatches.Patch(color='#2e7d32', label=f'Top {len(long_candidates)} Winners')
    short_patch = mpatches.Patch(color='#d32f2f', label=f'Bottom {len(short_candidates)} Losers')
    plt.legend(handles=[long_patch, short_patch], loc='center right', framealpha=0.9)

    plt.tight_layout()
    plt.show()
============================================================
              MOMENTUM SIGNAL DIAGNOSTIC CHART              
============================================================
🔗 Linked to Block 2 Logic:
   • Formation Window: 252 days
   • Excluded Window:  21 days
No description has been provided for this image

EXPLANATION OF THE CHART

The X-Axis: Formation Period Return (The "Signal") This axis answers the question: "Did this stock go up over the last year?"

  • What it measures: The long-term trend used to select the stock.
  • Time Period: From 1 year ago up to 1 month ago (Days -252 to -21).

Why we stop 1 month ago: In momentum strategies, stocks often experience a "short-term reversal" or "noise" in the most recent month. We intentionally exclude this month from the selection signal to avoid buying stocks that just spiked up on a single news event

The Y-Axis: Holding Period Return (The "Validation") This axis answers the question: "Is the stock STILL going up right now?"

  • What it measures: The very recent performance (the last month).
  • Time Period: From 1 month ago to Today (Days -21 to 0).

Why we track this: This serves as a diagnostic.

If a stock is a "Winner" (High X-Axis) but is crashing this month (Negative Y-Axis), it might be a "Falling Knife." If a stock is a "Loser" (Low X-Axis) but is rallying this month (Positive Y-Axis), it might be a "Short Squeeze."

In [5]:
# --- BLOCK 4: MOMENTUM QUADRANT FILTER (LINKED & OPTIMIZED) ---
# Import adjustText for professional labeling
try:
    from adjustText import adjust_text
except ImportError:
    import subprocess, sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "adjustText"])
    from adjustText import adjust_text

print("\n" + "="*66)
print(f"{'ADVANCED FILTER: REGIME CLASSIFICATION (LINKED LOGIC)':^66}")
print("="*66)

# 1. LINKING TO BLOCK 2 (REUSE VARIABLES)
# We strictly use the candidates and the Formation Return calculated in Block 2
# to avoid any window mismatch or calculation redundancy.
required_vars = ['long_candidates', 'short_candidates', 'period_simple_return', 'EXCLUDE_RECENT', 'prices_universe']

if not all(var in globals() for var in required_vars):
    print("❌ Critical Error: Variables from Block 2 are missing.")
    print("   Please run Block 2 immediately before this block.")
    candidates_long = []
    candidates_short = []
else:
    candidates_long = long_candidates
    candidates_short = short_candidates
    print(f"🔗 Linked to Block 2 Logic (Skip Recent: {EXCLUDE_RECENT} days).")
    print(f"📥 Input: Analyzing {len(candidates_long)} Longs and {len(candidates_short)} Shorts.")

    # 2. PREPARE METRICS (NO RECALCULATION FOR X-AXIS)
    target_tickers = candidates_long + candidates_short
    
    # X-Axis: Formation Trend (Directly from Block 2)
    # This ensures the 'Trend' is exactly what we screened for
    x_data = period_simple_return[target_tickers]

    # Y-Axis: Validation (Calculate Recent Performance)
    # We calculate the return for the 'EXCLUDE_RECENT' period (Reality Check)
    # Using the exact same prices_universe from Block 2
    validation_prices = prices_universe[target_tickers].tail(EXCLUDE_RECENT + 1)
    y_data = (validation_prices.iloc[-1] / validation_prices.iloc[0]) - 1

    # 3. CLASSIFICATION ENGINE
    def get_regime(t):
        if t not in x_data.index: return "UNKNOWN"
        x, y = x_data[t], y_data[t]
        
        if x >= 0 and y >= 0: return "TOP_RIGHT_CONT"       # ✅ Buy (Trend Up, Recent Up)
        if x >= 0 and y < 0:  return "BOTTOM_RIGHT_CRASH"   # ❌ Falling Knife (Trend Up, Recent Down)
        if x < 0 and y < 0:   return "BOTTOM_LEFT_CONT"     # ✅ Short (Trend Down, Recent Down)
        if x < 0 and y >= 0:  return "TOP_LEFT_SQZ"         # ❌ Short Squeeze (Trend Down, Recent Up)
        return "NEUTRAL"

    # 4. FILTER EXECUTION & REPORTING
    print(f"\n{'TICKER':<10} | {'TREND (FORMATION)':<18} | {'RECENT (VALIDATION)':<18} | {'STATUS'}")
    print("-" * 80)

    refined_longs = []
    refined_shorts = []
    rejects = []

    # Process Longs
    for t in candidates_long:
        regime = get_regime(t)
        if regime == "TOP_RIGHT_CONT":
            refined_longs.append(t)
            stat = "✅ KEEP"
        else:
            rejects.append(t)
            stat = f"❌ DROP ({regime})"
        print(f"{t:<10} | {x_data.get(t,0):>17.2%} | {y_data.get(t,0):>19.2%} | {stat}")

    print("-" * 80)

    # Process Shorts
    for t in candidates_short:
        regime = get_regime(t)
        if regime == "BOTTOM_LEFT_CONT":
            refined_shorts.append(t)
            stat = "✅ KEEP"
        else:
            rejects.append(t)
            stat = f"❌ DROP ({regime})"
        print(f"{t:<10} | {x_data.get(t,0):>17.2%} | {y_data.get(t,0):>19.2%} | {stat}")

    # 5. ENHANCED VISUALIZATION
    plt.figure(figsize=(12, 10))
    ax = plt.gca()

    # A. Plot "The Universe" (Context Cloud)
    # Shows where the rest of the market is, so you see if your picks are outliers
    # We grab the universe data from Block 2's period_simple_return for context
    universe_x = period_simple_return
    # We need universe Y (approximate for speed)
    universe_y = prices_universe.pct_change(EXCLUDE_RECENT).iloc[-1]
    
    plt.scatter(universe_x, universe_y, color='#e0e0e0', s=10, alpha=0.5, label='Market Universe', zorder=1)

    # B. Plot The "Survivors" (Verified)
    if refined_longs:
        plt.scatter(x_data[refined_longs], y_data[refined_longs], 
                    color='#00c853', s=150, edgecolors='black', label='Verified Longs', zorder=3)

    if refined_shorts:
        plt.scatter(x_data[refined_shorts], y_data[refined_shorts], 
                    color='#d50000', s=150, edgecolors='black', label='Verified Shorts', zorder=3)

    # C. Plot The "Rejects" (Traps)
    if rejects:
        plt.scatter(x_data[rejects], y_data[rejects], 
                    color='orange', s=100, marker='x', linewidth=2, label='Rejected (Risk)', zorder=3)

    # D. Quadrant Lines & Labels
    plt.axhline(0, color='black', linestyle='--', alpha=0.3)
    plt.axvline(0, color='black', linestyle='--', alpha=0.3)

    # Corner Labels
    plt.text(0.98, 0.98, "BUY ZONE\n(Trend Validated)", transform=ax.transAxes, ha='right', va='top', color='green', fontweight='bold', alpha=0.6)
    plt.text(0.02, 0.02, "SHORT ZONE\n(Weakness Validated)", transform=ax.transAxes, ha='left', va='bottom', color='red', fontweight='bold', alpha=0.6)
    plt.text(0.98, 0.02, "FALLING KNIFE\n(Recent Crash)", transform=ax.transAxes, ha='right', va='bottom', color='orange', fontweight='bold', alpha=0.6)
    plt.text(0.02, 0.98, "BEAR TRAP\n(Recent Bounce)", transform=ax.transAxes, ha='left', va='top', color='orange', fontweight='bold', alpha=0.6)

    # E. Smart Labels (No Overlap)
    texts = []
    for t in refined_longs + refined_shorts:
        texts.append(plt.text(x_data[t], y_data[t], t, fontweight='bold', fontsize=9))
    for t in rejects:
        texts.append(plt.text(x_data[t], y_data[t], t, color='gray', fontsize=8))

    try:
        adjust_text(texts, arrowprops=dict(arrowstyle='-', color='gray', alpha=0.5), force_text=(0.5, 1.0))
    except:
        pass

    # Final Styling
    plt.title(f"Regime Filter: Formation Trend vs Recent Validation ({EXCLUDE_RECENT} Days)", fontsize=14)
    plt.xlabel("Formation Trend Return (From Block 2)", fontsize=11)
    plt.ylabel(f"Validation Return (Last {EXCLUDE_RECENT} Days)", fontsize=11)
    plt.grid(True, alpha=0.2, linestyle=':')
    plt.legend(loc='upper left', frameon=True, framealpha=0.9)

    plt.tight_layout()
    plt.show()

    # 6. UPDATE GLOBAL VARIABLES
#    print(f"\n🔄 UPDATING TRADING LISTS...")
#    print(f"   Old Longs: {len(candidates_long)} -> New Longs: {len(refined_longs)}")
#    print(f"   Old Shorts: {len(candidates_short)} -> New Shorts: {len(refined_shorts)}")

    final_longs = refined_longs
    final_shorts = refined_shorts
#    print("✅ Ready for Optimization.")
    
    # --- 7. SAFETY CONTAINERS (USER REQUEST) ---
    # These create independent copies. Modifying 'final_longs' later will NOT affect these.
    container_longs = final_longs.copy()
    container_shorts = final_shorts.copy()
    
#    print(f"\n📦 SAFETY CONTAINERS SECURED:")
#    print(f"   Saved copy 'container_longs'  with {len(container_longs)} tickers.")
#    print(f"   Saved copy 'container_shorts' with {len(container_shorts)} tickers.")
==================================================================
      ADVANCED FILTER: REGIME CLASSIFICATION (LINKED LOGIC)       
==================================================================
🔗 Linked to Block 2 Logic (Skip Recent: 21 days).
📥 Input: Analyzing 60 Longs and 60 Shorts.

TICKER     | TREND (FORMATION)  | RECENT (VALIDATION) | STATUS
--------------------------------------------------------------------------------
WDC        |           239.60% |              25.57% | ✅ KEEP
STX        |           196.44% |              16.02% | ✅ KEEP
MU         |           183.35% |              32.68% | ✅ KEEP
HOOD       |           167.26% |             -11.69% | ❌ DROP (BOTTOM_RIGHT_CRASH)
NEM        |           158.14% |              13.44% | ✅ KEEP
WBD        |           153.47% |              -2.75% | ❌ DROP (BOTTOM_RIGHT_CRASH)
CVNA       |           126.90% |               1.52% | ✅ KEEP
PLTR       |           123.34% |             -12.85% | ❌ DROP (BOTTOM_RIGHT_CRASH)
LRCX       |           116.30% |              27.46% | ✅ KEEP
FIX        |            98.51% |              18.00% | ✅ KEEP
APP        |            90.94% |             -22.07% | ❌ DROP (BOTTOM_RIGHT_CRASH)
APH        |            81.17% |              11.52% | ✅ KEEP
TPR        |            81.05% |               0.17% | ✅ KEEP
KLAC       |            79.00% |              17.59% | ✅ KEEP
HII        |            77.30% |              17.67% | ✅ KEEP
GLW        |            76.15% |               4.01% | ✅ KEEP
HWM        |            71.27% |               5.55% | ✅ KEEP
DG         |            69.49% |              10.11% | ✅ KEEP
GE         |            69.41% |              -0.52% | ❌ DROP (BOTTOM_RIGHT_CRASH)
CVS        |            63.41% |               3.02% | ✅ KEEP
INTC       |            62.50% |              34.10% | ✅ KEEP
CAH        |            61.36% |               2.09% | ✅ KEEP
C          |            58.85% |              -5.16% | ❌ DROP (BOTTOM_RIGHT_CRASH)
AMD        |            57.55% |               8.33% | ✅ KEEP
ALB        |            56.77% |              18.07% | ✅ KEEP
TEL        |            52.02% |               0.63% | ✅ KEEP
GOOGL      |            51.23% |               2.83% | ✅ KEEP
IDXX       |            50.92% |               0.93% | ✅ KEEP
GOOG       |            50.82% |               2.45% | ✅ KEEP
RTX        |            50.10% |               6.11% | ✅ KEEP
DLTR       |            49.76% |              12.59% | ✅ KEEP
NRG        |            49.54% |              -5.46% | ❌ DROP (BOTTOM_RIGHT_CRASH)
CAT        |            49.37% |               8.67% | ✅ KEEP
GM         |            48.43% |              -5.61% | ❌ DROP (BOTTOM_RIGHT_CRASH)
CHRW       |            48.02% |               6.79% | ✅ KEEP
HCA        |            47.95% |              -1.66% | ❌ DROP (BOTTOM_RIGHT_CRASH)
IVZ        |            46.48% |               3.42% | ✅ KEEP
GS         |            45.77% |               5.02% | ✅ KEEP
STLD       |            45.60% |              -2.38% | ❌ DROP (BOTTOM_RIGHT_CRASH)
JBL        |            44.61% |               5.37% | ✅ KEEP
RL         |            43.61% |              -0.06% | ❌ DROP (BOTTOM_RIGHT_CRASH)
EXPE       |            43.51% |              -5.22% | ❌ DROP (BOTTOM_RIGHT_CRASH)
BK         |            43.15% |               3.35% | ✅ KEEP
AMAT       |            42.05% |              22.76% | ✅ KEEP
TKO        |            40.98% |              -5.34% | ❌ DROP (BOTTOM_RIGHT_CRASH)
MPWR       |            40.83% |              10.25% | ✅ KEEP
JCI        |            40.44% |              -5.06% | ❌ DROP (BOTTOM_RIGHT_CRASH)
TER        |            40.30% |              13.27% | ✅ KEEP
EL         |            39.52% |               6.74% | ✅ KEEP
AVGO       |            39.31% |              -4.42% | ❌ DROP (BOTTOM_RIGHT_CRASH)
WYNN       |            39.18% |             -10.61% | ❌ DROP (BOTTOM_RIGHT_CRASH)
WELL       |            38.89% |               1.00% | ✅ KEEP
FOXA       |            38.66% |              -2.00% | ❌ DROP (BOTTOM_RIGHT_CRASH)
COR        |            37.63% |               3.36% | ✅ KEEP
HAS        |            36.99% |               4.97% | ✅ KEEP
CMI        |            36.81% |               9.84% | ✅ KEEP
MNST       |            36.26% |               5.30% | ✅ KEEP
JNJ        |            35.18% |               6.45% | ✅ KEEP
MS         |            34.67% |               1.84% | ✅ KEEP
INCY       |            33.20% |               2.09% | ✅ KEEP
--------------------------------------------------------------------------------
TTD        |           -71.76% |              -8.44% | ✅ KEEP
FISV       |           -70.16% |              -4.40% | ✅ KEEP
DECK       |           -55.35% |              -2.31% | ✅ KEEP
ARE        |           -53.26% |              21.89% | ❌ DROP (TOP_LEFT_SQZ)
IT         |           -52.65% |             -11.06% | ✅ KEEP
MOH        |           -48.81% |              17.06% | ❌ DROP (TOP_LEFT_SQZ)
LULU       |           -48.31% |             -10.91% | ✅ KEEP
CHTR       |           -47.35% |              -9.92% | ✅ KEEP
FDS        |           -43.07% |              -4.39% | ✅ KEEP
CLX        |           -42.96% |              13.17% | ❌ DROP (TOP_LEFT_SQZ)
DOW        |           -42.59% |              16.52% | ❌ DROP (TOP_LEFT_SQZ)
ZBRA       |           -42.12% |              -2.89% | ✅ KEEP
GDDY       |           -41.87% |             -18.04% | ✅ KEEP
CMG        |           -41.82% |               3.60% | ❌ DROP (TOP_LEFT_SQZ)
STZ        |           -40.87% |              17.98% | ❌ DROP (TOP_LEFT_SQZ)
LYB        |           -40.83% |              15.00% | ❌ DROP (TOP_LEFT_SQZ)
CNC        |           -40.80% |              16.00% | ❌ DROP (TOP_LEFT_SQZ)
UNH        |           -40.38% |               4.60% | ❌ DROP (TOP_LEFT_SQZ)
BAX        |           -39.95% |               2.77% | ❌ DROP (TOP_LEFT_SQZ)
LW         |           -39.85% |               4.49% | ❌ DROP (TOP_LEFT_SQZ)
CAG        |           -39.67% |               1.51% | ❌ DROP (TOP_LEFT_SQZ)
PYPL       |           -37.52% |              -6.93% | ✅ KEEP
CPB        |           -36.91% |              -2.59% | ✅ KEEP
CPRT       |           -36.77% |               4.38% | ❌ DROP (TOP_LEFT_SQZ)
ERIE       |           -36.15% |               2.01% | ❌ DROP (TOP_LEFT_SQZ)
POOL       |           -36.14% |              13.51% | ❌ DROP (TOP_LEFT_SQZ)
XYZ        |           -35.24% |              -3.19% | ✅ KEEP
BLDR       |           -34.46% |              19.76% | ❌ DROP (TOP_LEFT_SQZ)
NOW        |           -33.94% |             -18.45% | ✅ KEEP
GPN        |           -33.75% |             -10.28% | ✅ KEEP
HPQ        |           -33.59% |             -13.32% | ✅ KEEP
TGT        |           -33.36% |              16.39% | ❌ DROP (TOP_LEFT_SQZ)
TPL        |           -31.89% |              10.70% | ❌ DROP (TOP_LEFT_SQZ)
DVA        |           -31.00% |              -9.06% | ✅ KEEP
SW         |           -30.95% |               7.50% | ❌ DROP (TOP_LEFT_SQZ)
OKE        |           -30.93% |               1.20% | ❌ DROP (TOP_LEFT_SQZ)
ALGN       |           -30.82% |               6.01% | ❌ DROP (TOP_LEFT_SQZ)
SWKS       |           -30.49% |             -10.62% | ✅ KEEP
BF-B       |           -30.39% |               2.46% | ❌ DROP (TOP_LEFT_SQZ)
GIS        |           -29.82% |              -2.98% | ✅ KEEP
MRNA       |           -29.64% |              33.68% | ❌ DROP (TOP_LEFT_SQZ)
ZTS        |           -29.55% |               0.57% | ❌ DROP (TOP_LEFT_SQZ)
ACN        |           -28.67% |               1.10% | ❌ DROP (TOP_LEFT_SQZ)
IP         |           -28.58% |               5.98% | ❌ DROP (TOP_LEFT_SQZ)
CARR       |           -28.50% |               4.35% | ❌ DROP (TOP_LEFT_SQZ)
PCG        |           -28.19% |              -3.18% | ✅ KEEP
PAYC       |           -27.52% |              -7.26% | ✅ KEEP
TYL        |           -27.27% |              -6.95% | ✅ KEEP
IRM        |           -27.21% |              14.73% | ❌ DROP (TOP_LEFT_SQZ)
BRO        |           -27.19% |              -0.68% | ✅ KEEP
VRSK       |           -26.83% |               0.40% | ❌ DROP (TOP_LEFT_SQZ)
CRM        |           -26.79% |             -16.14% | ✅ KEEP
EIX        |           -26.77% |               1.61% | ❌ DROP (TOP_LEFT_SQZ)
HRL        |           -26.42% |               3.65% | ❌ DROP (TOP_LEFT_SQZ)
NKE        |           -26.42% |              11.40% | ❌ DROP (TOP_LEFT_SQZ)
KMB        |           -26.19% |               1.76% | ❌ DROP (TOP_LEFT_SQZ)
CDW        |           -26.03% |              -9.34% | ✅ KEEP
TECH       |           -25.78% |              17.87% | ❌ DROP (TOP_LEFT_SQZ)
CMCSA      |           -25.61% |               3.87% | ❌ DROP (TOP_LEFT_SQZ)
LEN        |           -25.57% |              10.37% | ❌ DROP (TOP_LEFT_SQZ)
No description has been provided for this image

HOW TO USE THIS CHART IN PRACTICE:

  • Top-Right (Green Zone): X is High: Stock was a winner last year. Y is Positive: Stock is still winning this month. Verdict: Perfect Long Candidate. The trend is intact.

  • Bottom-Right (Orange Zone - "Momentum Crash"): X is High: Stock was a winner last year. Y is Negative: Stock crashed this month. Verdict: Danger. The trend might be ending.

  • Bottom-Left (Red Zone): X is Low: Stock was a loser last year. Y is Negative: Stock is still losing this month. Verdict: Perfect Short Candidate. The downtrend is persistent.

  • Top-Left (Orange Zone - "Short Squeeze Risk") X is Low (Negative): The stock was a Loser last year. It has a strong downtrend signal, which normally makes it a perfect candidate to Short (sell). Y is Positive (High): Despite the long-term crash, the stock has suddenly rallied in the last month.

########################################################################################################

This portfolio put together stocks that are starting to go up (Momentum) that at the same time are fundamentally cheap (Intrinsic value). The strategy is inspired to Eq1 and Eq2 in the journal of finance 68, no. 3 which uses the cross-sectional ranks minus their average to weights securities proportionally in zero-cost portfolios. For the composite score $$Score = (0.5 \times \text{Momentum Rank}) + (0.5 \times \text{Value Rank})$$ I used the eEq3 in the journal of finance 68, no. 3 which gives 50% weight to the security's book-to-market and 50% weight to its momentum score. (Asness et al, 2013) This strategy shoud give insight into a strategy that reduce long-term risks for Value investors. The weights are allocated in the following way: $$\text{Weight} = \frac{\text{Individual Score}}{\text{Total Sum of Scores}}$$

Asness, Clifford S., Tobias J. Moskowitz, and Lasse Heje Pedersen. "Value and momentum everywhere." The journal of finance 68, no. 3 (2013): 929-985.

In [6]:
# --- BLOCK 5: VALUE & MOMENTUM SCORING (ROBUST FAILSAFE) ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
import time
from tqdm import tqdm

print("\n" + "="*66)
print(f"{'MULTI-FACTOR SCORING: VALUE & MOMENTUM EVERYWHERE':^66}")
print("="*66)

# 0. CONFIGURATION
TARGET_COUNT = 10   

# 1. SETUP UNIVERSES
# Ensure we are using the refined lists from Block 4
if 'refined_longs' not in locals():
    print("⚠️ 'refined_longs' not found. Using 'final_longs' if available...")
    raw_longs = final_longs if 'final_longs' in locals() else []
    raw_shorts = final_shorts if 'final_shorts' in locals() else []
else:
    raw_longs = refined_longs
    raw_shorts = refined_shorts

all_candidates = list(set(raw_longs + raw_shorts))
print(f"📊 Candidate Pool: {len(all_candidates)} Tickers")

if not all_candidates:
    print("❌ No active tickers found. Please run Block 4 first.")
else:
    # 2. FETCH FUNDAMENTAL DATA (WITH RATE LIMITING)
    print("   > Fetching Book-to-Market data (Values > 0.0)...")
    bm_ratios = {}
    fetch_success = 0
    
    # We use a progress bar to track status
    for t in tqdm(all_candidates):
        try:
            # 1. Polite Delay (Prevents API blocking)
            time.sleep(0.1) 
            
            ticker = yf.Ticker(t)
            pb = None
            
            # 2. Try 'fast_info' (Newer API, faster)
            if hasattr(ticker, 'fast_info'):
                try:
                    pb = ticker.fast_info.get('priceToBook', None)
                except:
                    pass
            
            # 3. Fallback to 'info' (Older API, detailed)
            if pb is None or str(pb) == 'nan':
                try:
                    info = ticker.info
                    pb = info.get('priceToBook')
                except:
                    pass
            
            # 4. Repair Attempt (Add .L for UK stocks if missing)
            if pb is None and not t.endswith('.L') and len(t) < 5:
                try:
                    # Try adding .L suffix
                    uk_ticker = yf.Ticker(t + ".L")
                    if hasattr(uk_ticker, 'fast_info'):
                        pb = uk_ticker.fast_info.get('priceToBook', None)
                except:
                    pass

            # 5. Store Result
            if pb is not None and isinstance(pb, (int, float)) and pb > 0:
                bm_ratios[t] = 1.0 / pb  # Book-to-Market
                fetch_success += 1
            else:
                bm_ratios[t] = np.nan
                
        except Exception as e:
            # Silent fail to keep loop moving
            bm_ratios[t] = np.nan

    # 3. CHECK DATA HEALTH
    s_bm = pd.Series(bm_ratios)
    
    # Logic: Did we get enough data to be useful?
    valid_count = s_bm.count()
    print(f"\n🔎 Data Report: Successfully fetched {valid_count}/{len(all_candidates)} items.")
    
    if valid_count < (len(all_candidates) * 0.2):
        print("⚠️ CRITICAL: Less than 20% of data retrieved.")
        print("   >>> SWITCHING TO PURE MOMENTUM MODE to avoid bad math.")
        MODE_VALUE = False
        s_bm = pd.Series(1.0, index=all_candidates) # Neutral filler
    else:
        print("✅ Value Data looks healthy. Proceeding with Multi-Factor Model.")
        # Fill missing NaNs with the Median of the sector/universe
        median_bm = s_bm.median()
        s_bm = s_bm.fillna(median_bm)
        MODE_VALUE = True

    # 4. CALCULATE FACTORS (CENTERED RANKS)
    # Ensure indices match
    valid_targets = [t for t in all_candidates if t in x_data.index and t in y_data.index]
    
    x_act = x_data[valid_targets] # Trend
    y_act = y_data[valid_targets] # Recency
    
    rank_mom_trend = x_act.rank(pct=True) - 0.5
    rank_mom_recent = y_act.rank(pct=True) - 0.5
    
    # Momentum Factor (70/30)
    factor_momentum = (0.7 * rank_mom_trend) + (0.3 * rank_mom_recent)
    
    # Value Factor (Book-to-Market)
    # Realign s_bm to the valid targets
    s_bm_aligned = s_bm.reindex(valid_targets).fillna(s_bm.median())
    rank_value = s_bm_aligned.rank(pct=True) - 0.5
    factor_value = rank_value
    
    # 5. COMPOSITE SCORE
    if MODE_VALUE:
        # Multi-Factor: 50% Mom + 50% Value
        final_score = (0.5 * factor_momentum) + (0.5 * factor_value)
    else:
        # Pure Momentum Fallback
        final_score = factor_momentum

    # 6. FILTER & SELECT
    final_score = final_score.dropna().sort_values(ascending=False)
    
    # Longs: Take best from the 'Long Candidates' pool
    long_pool = [t for t in final_score.index if t in raw_longs]
    final_longs = final_score.loc[long_pool].nlargest(TARGET_COUNT).index.tolist()
    
    # Shorts: Take worst from the 'Short Candidates' pool
    # Note: For shorts, a LOW score is better (High Price-to-Book / Low Momentum)
    short_pool = [t for t in final_score.index if t in raw_shorts]
    final_shorts = final_score.loc[short_pool].nsmallest(TARGET_COUNT).index.tolist()
    
    final_tickers = final_longs + final_shorts
    
    print(f"✂️  Filtering Complete: Keeping Top {len(final_longs)} Longs & Top {len(final_shorts)} Shorts.")

    # 7. WEIGHTING (Rank-Based)
    final_ranks = final_score[final_tickers]
    side = pd.Series(1.0, index=final_tickers)
    for t in final_shorts: side[t] = -1.0 

    signal_prior = side * final_ranks.abs()
    
    # Normalize to 100% Gross Exposure
    if signal_prior.abs().sum() > 0:
        signal_prior /= signal_prior.abs().sum()
    else:
        signal_prior[:] = 1.0 / len(signal_prior) * side

    # 8. REPORTING
    sorted_priors = signal_prior.sort_values(ascending=True)

    print(f"\n{'TICKER':<8} | {'SIDE':<5} | {'MOMENTUM':<8} | {'VALUE':<8} | {'COMPOSITE':<9} | {'WEIGHT'}")
    print("-" * 70)
    
    for t, w in sorted_priors.items():
        direction = "SHORT" if w < 0 else "LONG"
        mom_s = factor_momentum.get(t, 0)
        
        if MODE_VALUE:
            val_s = factor_value.get(t, 0)
            comp_s = final_score.get(t, 0)
            val_str = f"{val_s:>8.2f}"
        else:
            val_str = "  N/A   "
            comp_s = mom_s # In fallback, composite is just momentum
            
        print(f"{t:<8} | {direction:<5} | {mom_s:>8.2f} | {val_str} | {comp_s:>9.2f} | {w:>8.1%}")

    # 9. VISUALIZATION
    plt.figure(figsize=(12, 8))
    colors = ['#d32f2f' if w < 0 else '#2e7d32' for w in sorted_priors]
    bars = plt.barh(sorted_priors.index, sorted_priors.values, color=colors, alpha=0.85, edgecolor='black')
    
    plt.axvline(0, color='black', linewidth=1.2)
    plt.grid(axis='x', alpha=0.2, linestyle='--')
    
    for bar, val in zip(bars, sorted_priors.values):
        offset = 0.002 if val >= 0 else -0.002
        ha = 'left' if val >= 0 else 'right'
        plt.text(val + offset, bar.get_y() + bar.get_height()/2, 
                 f"{val:.1%}", va='center', ha=ha, fontsize=9, fontweight='bold', color='#333333')

    title_mode = "Multi-Factor (Value + Mom)" if MODE_VALUE else "Pure Momentum (Value Data Missing)"
    plt.title(f"Allocation Strategy: {title_mode}", fontsize=14, pad=15)
    plt.xlabel("Allocated Weight %", fontsize=11)
    plt.tight_layout()
    plt.show()
    
    # Save for Forecast Block
    global_scores = final_score
    print(f"\n✅ Scoring Complete. Ready for Forecast.")
==================================================================
        MULTI-FACTOR SCORING: VALUE & MOMENTUM EVERYWHERE         
==================================================================
📊 Candidate Pool: 67 Tickers
   > Fetching Book-to-Market data (Values > 0.0)...
100%|██████████████████████████████████████████████████████████████████████████████████| 67/67 [00:32<00:00,  2.09it/s]
🔎 Data Report: Successfully fetched 63/67 items.
✅ Value Data looks healthy. Proceeding with Multi-Factor Model.
✂️  Filtering Complete: Keeping Top 10 Longs & Top 10 Shorts.

TICKER   | SIDE  | MOMENTUM | VALUE    | COMPOSITE | WEIGHT
----------------------------------------------------------------------
GDDY     | SHORT |    -0.40 |    -0.49 |     -0.44 |    -8.9%
IT       | SHORT |    -0.44 |    -0.40 |     -0.42 |    -8.4%
NOW      | SHORT |    -0.36 |    -0.22 |     -0.29 |    -5.8%
TTD      | SHORT |    -0.44 |    -0.04 |     -0.24 |    -4.8%
LULU     | SHORT |    -0.42 |     0.10 |     -0.16 |    -3.3%
HPQ      | SHORT |    -0.33 |     0.01 |     -0.16 |    -3.2%
DECK     | SHORT |    -0.37 |     0.05 |     -0.16 |    -3.2%
DVA      | SHORT |    -0.28 |     0.01 |     -0.14 |    -2.8%
CDW      | SHORT |    -0.20 |    -0.05 |     -0.13 |    -2.6%
FDS      | SHORT |    -0.35 |     0.11 |     -0.12 |    -2.4%
GS       | LONG  |     0.09 |     0.28 |      0.18 |     3.7%
BK       | LONG  |     0.05 |     0.37 |      0.21 |     4.2%
STX      | LONG  |     0.45 |     0.01 |      0.23 |     4.6%
DG       | LONG  |     0.30 |     0.17 |      0.23 |     4.7%
IVZ      | LONG  |     0.09 |     0.47 |      0.28 |     5.7%
ALB      | LONG  |     0.29 |     0.29 |      0.29 |     5.9%
HII      | LONG  |     0.36 |     0.23 |      0.30 |     6.0%
NEM      | LONG  |     0.42 |     0.19 |      0.31 |     6.2%
CVS      | LONG  |     0.21 |     0.44 |      0.33 |     6.6%
INTC     | LONG  |     0.34 |     0.34 |      0.34 |     6.9%
No description has been provided for this image
✅ Scoring Complete. Ready for Forecast.
In [7]:
# --- BLOCK 5-a: DATA INTEGRITY AUDIT (BOOK-TO-MARKET CHECK) ---
print("\n" + "="*60)
print(f"{'DATA AUDIT: BOOK-TO-MARKET FETCH RESULTS':^60}")
print("="*60)

# 1. RETRIEVE DATA
if 'bm_ratios' not in locals():
    print("❌ Error: 'bm_ratios' missing. Please run Block 5 first.")
else:
    # Convert dictionary to Series for easy counting
    audit_series = pd.Series(bm_ratios)
    
    total_count = len(audit_series)
    missing_count = audit_series.isna().sum()
    success_count = total_count - missing_count
    success_rate = success_count / total_count if total_count > 0 else 0

    # 2. PRINT SUMMARY
    print(f"📊 TOTAL CANDIDATES: {total_count}")
    print(f"✅ SUCCESSFUL FETCH: {success_count} ({success_rate:.1%})")
    print(f"❌ FAILED / MISSING: {missing_count} ({1 - success_rate:.1%})")
    print("-" * 60)

    # 3. LIST THE "LOSERS" (MISSING DATA)
    if missing_count > 0:
        missing_tickers = audit_series[audit_series.isna()].index.tolist()
        print(f"⚠️  MISSING DATA FOR ({len(missing_tickers)}):")
        # Print nicely formatted list (10 per line)
        for i in range(0, len(missing_tickers), 10):
            print(f"    {', '.join(missing_tickers[i:i+10])}")
            
        print("\n💡 NOTE: These stocks were filled with the MEDIAN Book-to-Market")
        print(f"    ratio ({s_bm.median():.2f}) to prevent the strategy from crashing.")
        print("    This is a neutral assumption (they get a 'Middle of the Pack' Value rank).")
    else:
        print("🎉 PERFECT DATA! No missing values.")

    # 4. SPOT CHECK (THE WINNERS)
    print("-" * 60)
    print("🔎 SPOT CHECK (TOP 5 DEEP VALUE STOCKS FOUND):")
    # Show the 5 highest Book-to-Market (Cheapest) stocks actually found
    top_value = audit_series.dropna().sort_values(ascending=False).head(5)
    for t, val in top_value.items():
        print(f"    {t:<8} | B/M Ratio: {val:.2f} (P/B: {1/val:.2f})")
    
    print("="*60)
============================================================
          DATA AUDIT: BOOK-TO-MARKET FETCH RESULTS          
============================================================
📊 TOTAL CANDIDATES: 67
✅ SUCCESSFUL FETCH: 63 (94.0%)
❌ FAILED / MISSING: 4 (6.0%)
------------------------------------------------------------
⚠️  MISSING DATA FOR (4):
    HPQ, CAH, DVA, STX

💡 NOTE: These stocks were filled with the MEDIAN Book-to-Market
    ratio (0.16) to prevent the strategy from crashing.
    This is a neutral assumption (they get a 'Middle of the Pack' Value rank).
------------------------------------------------------------
🔎 SPOT CHECK (TOP 5 DEEP VALUE STOCKS FOUND):
    GPN      | B/M Ratio: 1.34 (P/B: 0.75)
    PCG      | B/M Ratio: 0.91 (P/B: 1.10)
    IVZ      | B/M Ratio: 0.89 (P/B: 1.13)
    FISV     | B/M Ratio: 0.73 (P/B: 1.38)
    CVS      | B/M Ratio: 0.72 (P/B: 1.39)
============================================================

--- CONFIGURATION WALK-FORWARD ANALYSIS (ROLLING OPTIMIZATION)---¶

The logic here is designed to be an "Honest Audit" of your core engine. It strips away the complex, potentially biased "Value" data (since we can't accurately get that for the past - not for free) and tests the raw horsepower of your Momentum Strategy over history.

Here is the step-by-step breakdown of exactly how the computer is thinking and executing this simulation.

  1. The Logic: "The 70/30 Momentum Engine" This strategy believes that stock price moves are not random; they follow a specific physics. It looks for two forces acting together:

Force A: The Deep Trend (70% Weight)

The Math: Return from 12 months ago to 1 month ago (t-12 to t-1).

The Logic: "Is this stock fundamentally winning over the long haul?" This filters out short-term noise and finds stocks with institutional backing.

Force B: The Recent Spark (30% Weight)

The Math: Return from 1 month ago to today (t-1 to t).

The Logic: "Is the trend accelerating?" We don't want a stock that went up last year but has been dead for 30 days. We want active heat.

The Selection Process: Every month, the code looks at the entire market (e.g., 500 stocks) and calculates this Composite Score for every single one.

Buy the 10 strongest

Short the 10 worse

  1. The Backtest: "The Walk-Forward Simulation" This is the most critical part. The code does not look at the whole history at once. It pretends to live through history day by day.

The Timeline:

Start Date: It goes back to the first day where it has enough data (Year 1).

Rebalance Day (e.g., Jan 31, 2015):

The algorithm "wakes up."

It looks only at data available before Jan 31.

It picks the Top 10 / Bottom 10 stocks based on that past data.

It buys them.

The "Walk" (Feb 1 - Feb 28, 2015):

It holds those stocks for exactly one month.

It records the profit or loss from those specific stocks.

Repeat: On Feb 28, it sells everything, re-runs the scan, picks new winners/losers, and holds for March.

  1. Why Long-Short vs. Long-Only? The block runs two parallel simulations to show you the difference:

Long-Only (The Green Line):

It just buys the Top 10 winners.

Logic: "I want maximum growth."

Risk: If the market crashes (like 2008 or 2020), this line will crash hard because it has no protection.

Long-Short (The Purple Line):

It buys the Top 10 winners AND shorts the Bottom 10 losers.

Logic: 0 cost portfolio, net long-short is 0

The Hedge: If the market crashes 20%, your Longs lose money, BUT your Shorts (the bad stocks) likely crash even harder (e.g., 30%), making you a profit on the way down.

Result: A smoother equity curve that is safer during crashes, but might lag during raging bull markets.

This won't hence be a perfect backtest as we are missing a key part of the equation, on the other hand we cannot assume that the Book-to-market todays is the same it is today, as this can change dramatically over time. By removing the "Value" factor from this specific block, we are answering the question: Even without this data, does the momentum strategy based on price action alone have an edge?

In [8]:
# --- BLOCK 6: STRATEGY SIMULATION (CORRECTED TRADING DATES) ---
try:
    from tqdm import tqdm
    tqdm_available = True
except ImportError:
    tqdm_available = False

print("\n" + "="*80)
print(f"{'FINAL REPORT: PURE MOMENTUM BACKTEST (HONEST SIMULATION)':^80}")
print("="*80)

# 1. SETUP PARAMETERS
LOOKBACK_WINDOW = 252   # 1 Year Trend
TOP_N_SELECT    = 10    # Top 10 Winners / Bottom 10 Losers

# Safety Check for Risk-Free Rate
if 'raw_tnx' not in locals():
    print("⚠️ 'raw_tnx' missing. Using default 4% rate.")
    rf_data = 0.04
else:
    rf_data = raw_tnx

# --- 2. VECTORIZED SIGNAL GENERATION ---
if 'prices_stocks' in locals():
    print("... Pre-calculating Momentum Factors ...")
    
    # Standard Asness Momentum
    p_lag_1m = prices_stocks.shift(21)
    p_lag_1y = prices_stocks.shift(252 + 21)
    factor_trend = (p_lag_1m / p_lag_1y) - 1
    factor_recent = (prices_stocks / p_lag_1m) - 1

else:
    print("❌ Critical Error: 'prices_stocks' not found.")
    factor_trend, factor_recent = pd.DataFrame(), pd.DataFrame()

# --- 3. THE WEIGHTING ENGINE ---
def get_weights_at_date(date, mode="LONG-SHORT"):
    if date not in factor_trend.index: return None
    
    row_trend = factor_trend.loc[date]
    row_recent = factor_recent.loc[date]
    
    valid_mask = row_trend.notna() & row_recent.notna()
    if valid_mask.sum() < TOP_N_SELECT * 2: return None
    
    x = row_trend[valid_mask]
    y = row_recent[valid_mask]
    
    # 70/30 Composite Score
    rank_trend = x.rank(pct=True) - 0.5
    rank_recent = y.rank(pct=True) - 0.5
    final_score = (0.7 * rank_trend) + (0.3 * rank_recent)
    
    longs = final_score.nlargest(TOP_N_SELECT).index
    shorts = final_score.nsmallest(TOP_N_SELECT).index
    
    selected = list(longs) if mode == "LONG-ONLY" else list(longs) + list(shorts)
    
    conviction = final_score[selected]
    sides = pd.Series(1.0, index=selected)
    
    if mode == "LONG-SHORT":
        for t in shorts: sides[t] = -1.0
            
    raw_weights = sides * conviction.abs()
    return raw_weights / raw_weights.abs().sum()

# --- 4. THE SIMULATION LOOP (ROBUST DATES) ---
def run_fast_backtest(mode):
    all_returns = prices_stocks.pct_change(fill_method=None).fillna(0)
    
    # --- CRITICAL FIX: FIND LAST TRADING DAY OF EVERY MONTH ---
    # We ignore calendar days and look strictly at the index
    start_idx = 273
    subset_dates = prices_stocks.index[start_idx:]
    
    # Group by Year-Month and pick the max index (Last Trading Day)
    rebalance_dates = subset_dates.to_series().groupby(
        [subset_dates.year, subset_dates.month]
    ).apply(lambda x: x.max())
    
    rebalance_dates = rebalance_dates.sort_values().unique()
    
    out_of_sample_returns = []
    
    print(f"   > Simulating [{mode}] ({len(rebalance_dates)} rebalances)...")
    iterator = tqdm(range(len(rebalance_dates) - 1)) if tqdm_available else range(len(rebalance_dates) - 1)
    
    for i in iterator:
        rebal_date = rebalance_dates[i]
        next_date  = rebalance_dates[i+1]
        
        weights = get_weights_at_date(rebal_date, mode)
        
        # If weights are missing, we carry over cash (flat return)
        # But now that we use valid dates, this should rarely happen.
        if weights is None: continue
            
        mask = (all_returns.index > rebal_date) & (all_returns.index <= next_date)
        period_rets = all_returns.loc[mask]
        
        if period_rets.empty: continue
        
        valid_tickers = weights.index.intersection(period_rets.columns)
        period_pnl = period_rets[valid_tickers].dot(weights[valid_tickers])
        out_of_sample_returns.append(period_pnl)
        
    return pd.concat(out_of_sample_returns) if out_of_sample_returns else pd.Series(dtype=float)

# --- 5. EXECUTION & REPORTING ---
if 'prices_stocks' in locals():
    wf_rets_lo = run_fast_backtest("LONG-ONLY")
    wf_rets_ls = run_fast_backtest("LONG-SHORT")
    
    # Benchmark
    if not wf_rets_ls.empty:
        s_date, e_date = wf_rets_ls.index[0], wf_rets_ls.index[-1]
        try:
            bench_df = yf.download(["^GSPC", "GBPUSD=X"], start=s_date, end=e_date, progress=False, auto_adjust=True)
            if isinstance(bench_df.columns, pd.MultiIndex):
                bench_df = bench_df.xs('Close', axis=1, level=0)
            elif 'Close' in bench_df.columns:
                bench_df = bench_df['Close']
            bench_df = bench_df.ffill().bfill()
            spx_gbp = bench_df["^GSPC"] / bench_df["GBPUSD=X"]
            bench_rets = spx_gbp.pct_change().fillna(0).reindex(wf_rets_ls.index).fillna(0)
        except:
            bench_rets = pd.Series(0, index=wf_rets_ls.index)

        # Curves
        rf_daily = float(rf_data) / 252 if isinstance(rf_data, (float, int)) else 0.04/252
        curve_lo = 10000 * (1 + wf_rets_lo).cumprod()
        curve_ls = 10000 * (1 + wf_rets_ls + rf_daily).cumprod()
        curve_bn = 10000 * (1 + bench_rets).cumprod()

        # Visualization
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10), height_ratios=[2, 1])
        
        ax1.plot(curve_lo, color='#2e7d32', label='Long-Only', linewidth=2)
        ax1.plot(curve_ls, color='#6a1b9a', label='Long-Short', linewidth=2)
        ax1.plot(curve_bn, color='gray', linestyle='--', label='S&P 500 (GBP)', alpha=0.7)
        ax1.set_title("Strategy Backtest (Fixed Date Alignment)", fontsize=14, fontweight='bold')
        ax1.set_ylabel("Portfolio Value (£)")
        ax1.legend()
        ax1.grid(True, alpha=0.15)
        
        dd_lo = curve_lo / curve_lo.cummax() - 1
        dd_ls = curve_ls / curve_ls.cummax() - 1
        ax2.fill_between(dd_lo.index, dd_lo, 0, color='#2e7d32', alpha=0.3)
        ax2.plot(dd_ls, color='#6a1b9a', linewidth=1)
        ax2.set_title("Drawdowns", fontsize=12, fontweight='bold')
        ax2.grid(True, alpha=0.15)
        
        plt.tight_layout()
        plt.show()
        
        def get_cagr(curve):
            days = (curve.index[-1] - curve.index[0]).days
            return (curve.iloc[-1]/curve.iloc[0])**(365/days) - 1 if days > 0 else 0
            
        print(f"\n{'METRIC':<15} | {'LONG-ONLY':<12} | {'LONG-SHORT':<12} | {'BENCHMARK':<12}")
        print("-" * 60)
        print(f"{'CAGR':<15} | {get_cagr(curve_lo):<12.2%} | {get_cagr(curve_ls):<12.2%} | {get_cagr(curve_bn):<12.2%}")
        print("-" * 60)

else:
    print("❌ Critical Error: 'prices_stocks' missing.")
================================================================================
            FINAL REPORT: PURE MOMENTUM BACKTEST (HONEST SIMULATION)            
================================================================================
... Pre-calculating Momentum Factors ...
   > Simulating [LONG-ONLY] (61 rebalances)...
100%|█████████████████████████████████████████████████████████████████████████████████| 60/60 [00:00<00:00, 135.55it/s]
   > Simulating [LONG-SHORT] (61 rebalances)...
100%|█████████████████████████████████████████████████████████████████████████████████| 60/60 [00:00<00:00, 119.54it/s]
No description has been provided for this image
METRIC          | LONG-ONLY    | LONG-SHORT   | BENCHMARK   
------------------------------------------------------------
CAGR            | 33.48%       | 9.31%        | 13.03%      
------------------------------------------------------------
In [9]:
# --- BLOCK 7: TRADING JOURNAL (FULL DETAIL FIX) ---
print("\n" + "="*80)
print(f"{'TRADING JOURNAL: MONTHLY PERFORMANCE LOG':^80}")
print("="*80)

# 1. DATA SAFETY CHECK & RELOAD
if 'prices_stocks' not in locals():
    print("❌ Critical Error: 'prices_stocks' missing.")
else:
    # RE-CALCULATE FACTORS LOCALLY (Ensures they exist for lookup)
    # We use .normalize() to strip time components (e.g. 16:00:00 -> 00:00:00)
    prices_safe = prices_stocks.copy()
    prices_safe.index = prices_safe.index.normalize()
    
    p_lag_1m = prices_safe.shift(21)
    p_lag_1y = prices_safe.shift(252 + 21)
    
    # Re-build factors
    f_trend = (p_lag_1m / p_lag_1y) - 1
    f_recent = (prices_safe / p_lag_1m) - 1
    
    # SETTINGS
    display_months = 4
    TOP_N = 10
    
    # 2. LOCAL WEIGHT GETTER (Time-Zone Insensitive)
    def get_weights_debug(target_date, mode):
        target = pd.Timestamp(target_date).normalize()
        
        # 1. Index Check
        if target not in f_trend.index:
            try:
                # Find closest preceding date if exact match fails
                loc_idx = f_trend.index.get_indexer([target], method='pad')[0]
                target = f_trend.index[loc_idx]
            except:
                return None, "Date Not Found"

        # 2. Retrieve Data
        row_trend = f_trend.loc[target]
        row_recent = f_recent.loc[target]
        
        # 3. Validation
        valid = row_trend.notna() & row_recent.notna()
        if valid.sum() < TOP_N * 2:
            return None, "Insufficient Data"
            
        # 4. Calculation (Pure Momentum)
        x = row_trend[valid]
        y = row_recent[valid]
        
        rank_trend = x.rank(pct=True) - 0.5
        rank_recent = y.rank(pct=True) - 0.5
        score = (0.7 * rank_trend) + (0.3 * rank_recent)
        
        longs = score.nlargest(TOP_N).index
        shorts = score.nsmallest(TOP_N).index
        
        selected = list(longs) if mode == "LONG-ONLY" else list(longs) + list(shorts)
        
        # Weighting
        w = score[selected].abs()
        w = w / w.sum()
        
        # Apply Direction
        out = pd.Series(0.0, index=selected)
        for t, val in w.items():
            if t in shorts and mode == "LONG-SHORT":
                out[t] = -val
            else:
                out[t] = val
                
        return out, "Success"

    # 3. REPORT GENERATION
    if 'curve_lo' in locals() and 'curve_ls' in locals():
        # Resample curve to Month End to get reporting buckets
        report_dates = curve_lo.resample('ME').last().index[-(display_months+1):]
        
        prev_w_lo = pd.Series(dtype=float)
        prev_w_ls = pd.Series(dtype=float)

        def fmt(w):
            if w is None or w.empty: return "Cash"
            s = w.iloc[(-w.abs()).argsort()]
            return ", ".join([f"{t}({v:+.1%})" for t, v in s.items()])

        def classify(curr, prev):
            curr = curr if curr is not None else pd.Series(dtype=float)
            prev = prev if prev is not None else pd.Series(dtype=float)
            all_t = set(curr.index) | set(prev.index)
            
            l_buy, l_sell = [], []
            s_entry, s_cover = [], []

            for t in all_t:
                w_n = curr.get(t,0)
                w_o = prev.get(t,0)
                
                # Long Logic
                if w_o <= 0.001 and w_n > 0.001: l_buy.append(t)
                elif w_o > 0.001 and w_n <= 0.001: l_sell.append(t)
                
                # Short Logic
                if w_o >= -0.001 and w_n < -0.001: s_entry.append(t)
                elif w_o < -0.001 and w_n >= -0.001: s_cover.append(t)

            return ", ".join(l_buy) if l_buy else "-", ", ".join(l_sell) if l_sell else "-", \
                   ", ".join(s_entry) if s_entry else "-", ", ".join(s_cover) if s_cover else "-"

        print(f"Generating Journal for last {display_months} months...")
        
        for i in range(len(report_dates) - 1):
            d_start = report_dates[i]
            d_end = report_dates[i+1]
            
            # PnL Retrieval (Both Strategies)
            try:
                # Long Only
                lo_val_s = curve_lo.asof(d_start)
                lo_val_e = curve_lo.asof(d_end)
                lo_ret = (lo_val_e / lo_val_s) - 1
                lo_pnl = lo_val_e - lo_val_s
                
                # Long Short
                ls_val_s = curve_ls.asof(d_start)
                ls_val_e = curve_ls.asof(d_end)
                ls_ret = (ls_val_e / ls_val_s) - 1
                ls_pnl = ls_val_e - ls_val_s
            except:
                continue

            # Weight Retrieval
            w_lo, status = get_weights_debug(d_start, "LONG-ONLY")
            w_ls, _      = get_weights_debug(d_start, "LONG-SHORT")
            
            # Trade Classification
            lo_buys, lo_sells, _, _ = classify(w_lo, prev_w_lo)
            ls_buys, ls_sells, ls_shorts, ls_covers = classify(w_ls, prev_w_ls)
            
            # Update Prev
            prev_w_lo = w_lo
            prev_w_ls = w_ls

            print(f"\n📅 PERIOD: {d_start.strftime('%Y-%m-%d')} to {d_end.strftime('%Y-%m-%d')}")
            if status != "Success":
                print(f"   ⚠️ WARNING: Could not retrieve holdings. Reason: {status}")
            print("-" * 80)
            
            # REPORT: LONG ONLY
            print(f"   [LONG-ONLY STRATEGY]")
            print(f"   💰 Value: £{lo_val_e:,.0f}  |  📈 Return: {lo_ret:>+6.2%} (PnL: £{lo_pnl:+,.0f})")
            print(f"   🟢 BUYS:       {lo_buys}")
            print(f"   🔴 SELLS:      {lo_sells}")
            print(f"   📊 HOLDINGS:   {fmt(w_lo)}")
            
            print("-" * 40)
            
            # REPORT: LONG SHORT
            print(f"   [LONG-SHORT STRATEGY]")
            print(f"   💰 Value: £{ls_val_e:,.0f}  |  📈 Return: {ls_ret:>+6.2%} (PnL: £{ls_pnl:+,.0f})")
            print(f"   🟢 LONG ADD:   {ls_buys}")
            print(f"   🔴 LONG CUT:   {ls_sells}")
            print(f"   📉 SHORT ADD:  {ls_shorts}")
            print(f"   🔄 COVER:      {ls_covers}")
            print(f"   📊 ALLOCATION: {fmt(w_ls)}")

        print("\n✅ Performance Log Complete.")
    else:
        print("❌ Error: 'curve_lo' or 'curve_ls' not found. Run Block 6 first.")
================================================================================
                    TRADING JOURNAL: MONTHLY PERFORMANCE LOG                    
================================================================================
Generating Journal for last 4 months...

📅 PERIOD: 2025-09-30 to 2025-10-31
--------------------------------------------------------------------------------
   [LONG-ONLY STRATEGY]
   💰 Value: £36,777  |  📈 Return: +7.59% (PnL: £+2,596)
   🟢 BUYS:       APP, IBKR, STX, WDC, HOOD, FIX, HWM, DASH, TPR, PLTR
   🔴 SELLS:      -
   📊 HOLDINGS:   APP(+10.5%), HOOD(+10.5%), PLTR(+10.3%), FIX(+10.1%), TPR(+10.0%), STX(+9.8%), WDC(+9.8%), IBKR(+9.7%), HWM(+9.7%), DASH(+9.6%)
----------------------------------------
   [LONG-SHORT STRATEGY]
   💰 Value: £14,902  |  📈 Return: +4.71% (PnL: £+670)
   🟢 LONG ADD:   IBKR, STX, DASH, APP, WDC, HOOD, FIX, HWM, TPR, PLTR
   🔴 LONG CUT:   -
   📉 SHORT ADD:  STZ, BAX, ALGN, DECK, BF-B, IFF, ERIE, LYB, TTD, DOW
   🔄 COVER:      -
   📊 ALLOCATION: APP(+5.3%), HOOD(+5.3%), PLTR(+5.2%), TTD(-5.1%), FIX(+5.1%), ALGN(-5.1%), STZ(-5.1%), LYB(-5.1%), TPR(+5.1%), STX(+5.0%), WDC(+5.0%), IBKR(+4.9%), HWM(+4.9%), DASH(+4.9%), BF-B(-4.9%), ERIE(-4.9%), DOW(-4.9%), BAX(-4.8%), IFF(-4.8%), DECK(-4.7%)

📅 PERIOD: 2025-10-31 to 2025-11-30
--------------------------------------------------------------------------------
   [LONG-ONLY STRATEGY]
   💰 Value: £37,034  |  📈 Return: +0.70% (PnL: £+258)
   🟢 BUYS:       WBD, MU, GLW, AVGO, LRCX, APH
   🔴 SELLS:      IBKR, STX, DASH, APP, HOOD, TPR
   📊 HOLDINGS:   WBD(+10.5%), WDC(+10.4%), FIX(+10.4%), APH(+10.1%), MU(+10.0%), AVGO(+10.0%), PLTR(+9.8%), GLW(+9.6%), HWM(+9.6%), LRCX(+9.6%)
----------------------------------------
   [LONG-SHORT STRATEGY]
   💰 Value: £14,994  |  📈 Return: +0.61% (PnL: £+91)
   🟢 LONG ADD:   MU, LRCX, WBD, GLW, AVGO, APH
   🔴 LONG CUT:   IBKR, DASH, APP, HOOD, TPR, STX
   📉 SHORT ADD:  CMG, FISV, ARE, BLDR, CAG, SWK, MOH
   🔄 COVER:      STZ, ALGN, IFF, LYB, TTD, DOW, BF-B
   📊 ALLOCATION: WBD(+5.4%), WDC(+5.3%), FIX(+5.3%), MOH(-5.2%), FISV(-5.2%), DECK(-5.2%), APH(+5.2%), MU(+5.1%), BAX(-5.1%), AVGO(+5.1%), CMG(-5.1%), PLTR(+5.0%), GLW(+4.9%), HWM(+4.9%), LRCX(+4.9%), ARE(-4.9%), BLDR(-4.7%), CAG(-4.6%), ERIE(-4.5%), SWK(-4.5%)

📅 PERIOD: 2025-11-30 to 2025-12-31
--------------------------------------------------------------------------------
   [LONG-ONLY STRATEGY]
   💰 Value: £36,910  |  📈 Return: -0.33% (PnL: £-124)
   🟢 BUYS:       GOOGL, DLTR, NEM, IDXX, GOOG, RL, CAH
   🔴 SELLS:      MU, GLW, LRCX, FIX, HWM, APH, PLTR
   📊 HOLDINGS:   WDC(+11.1%), WBD(+10.5%), NEM(+10.1%), IDXX(+9.9%), RL(+9.9%), DLTR(+9.8%), GOOGL(+9.8%), GOOG(+9.7%), CAH(+9.7%), AVGO(+9.6%)
----------------------------------------
   [LONG-SHORT STRATEGY]
   💰 Value: £14,771  |  📈 Return: -1.49% (PnL: £-223)
   🟢 LONG ADD:   GOOGL, IDXX, RL, CAH, DLTR, NEM, GOOG
   🔴 LONG CUT:   MU, LRCX, HWM, GLW, FIX, APH, PLTR
   📉 SHORT ADD:  TTD, CHTR, TPL, ADBE, IT, UNH, MRNA
   🔄 COVER:      CMG, BAX, DECK, BLDR, CAG, ERIE, SWK
   📊 ALLOCATION: TTD(-5.6%), WDC(+5.5%), CHTR(-5.3%), WBD(+5.2%), IT(-5.2%), FISV(-5.2%), NEM(+5.0%), ARE(-5.0%), MRNA(-4.9%), IDXX(+4.9%), RL(+4.9%), DLTR(+4.9%), GOOGL(+4.9%), TPL(-4.8%), GOOG(+4.8%), UNH(-4.8%), CAH(+4.8%), ADBE(-4.8%), AVGO(+4.8%), MOH(-4.7%)

📅 PERIOD: 2025-12-31 to 2026-01-31
--------------------------------------------------------------------------------
   [LONG-ONLY STRATEGY]
   💰 Value: £42,360  |  📈 Return: +14.76% (PnL: £+5,449)
   🟢 BUYS:       HII, MU, DG, LRCX, C, TPR
   🔴 SELLS:      GOOGL, IDXX, GOOG, AVGO, RL, CAH
   📊 HOLDINGS:   MU(+10.7%), WBD(+10.5%), NEM(+10.3%), TPR(+10.2%), WDC(+10.0%), LRCX(+10.0%), HII(+9.7%), DLTR(+9.6%), DG(+9.6%), C(+9.5%)
----------------------------------------
   [LONG-SHORT STRATEGY]
   💰 Value: £15,558  |  📈 Return: +5.33% (PnL: £+787)
   🟢 LONG ADD:   HII, MU, LRCX, C, TPR, DG
   🔴 LONG CUT:   GOOGL, IDXX, RL, CAH, GOOG, AVGO
   📉 SHORT ADD:  CLX, PYPL, BLDR, LYB, POOL, BF-B, ZBRA, GDDY
   🔄 COVER:      FISV, CHTR, TPL, ADBE, IT, UNH, MRNA, MOH
   📊 ALLOCATION: MU(+5.5%), WBD(+5.4%), NEM(+5.3%), TPR(+5.2%), ARE(-5.2%), WDC(+5.1%), TTD(-5.1%), LRCX(+5.1%), BF-B(-5.0%), HII(+5.0%), BLDR(-4.9%), DLTR(+4.9%), LYB(-4.9%), DG(+4.9%), C(+4.8%), PYPL(-4.8%), POOL(-4.8%), ZBRA(-4.8%), CLX(-4.7%), GDDY(-4.7%)

✅ Performance Log Complete.

------- BETA-HEDGED WALK-FORWARD (Market Neutral) -----------

This block implements a Beta-Neutral Momentum Strategy.¶

Unlike the previous "Dollar Neutral" (50% Long / 50% Short) approach, this strategy calculates the Beta (volatility relative to the market) of your Longs vs. your Shorts.

Scenario: If your Longs are very volatile (High Beta) and your Shorts are stable (Low Beta), a 50/50 portfolio is actually Net Long risk.

The Fix: This algorithm calculates a Hedge Ratio (e.g., 1.5x) and increases the size of the Short position to mathematically neutralize market risk.

--- CONFIGURATION ---¶

LOOKBACK_BETA = 126 # 6 Months for Beta Calculation LOOKBACK_MOM = 252 # 12 Months for Trend REBALANCE_FREQ = 'ME' # Monthly NUM_POSITIONS = 10 # Top 10 Winners / Bottom 10 Losers TX_COST = 0.001 # 0.10% per trade RF_RATE = last_rf # last Risk-Free Rate

In [10]:
# --- BLOCK 8: BETA-HEDGED MOMENTUM (SILENT & OPTIMIZED) ---
try:
    from tqdm import tqdm
    tqdm_available = True
except ImportError:
    tqdm_available = False

print("\n" + "="*80)
print(f"{'STRATEGY: BETA-HEDGED MARKET NEUTRAL (OPTIMIZED)':^80}")
print("="*80)

# --- CONFIGURATION ---
LOOKBACK_BETA   = 126       # 6 Months for Beta Calculation
LOOKBACK_MOM    = 252       # 12 Months for Trend
REBALANCE_FREQ  = 'ME'      # Monthly
NUM_POSITIONS   = 10        # Top 10 Winners / Bottom 10 Losers
TX_COST         = 0.001     # 0.10% per trade
RF_RATE         = last_rf      # last Risk-Free Rate

# 1. VECTORIZED PRE-CALCULATION
if 'prices_stocks' in locals():
    print("... Pre-calculating Momentum and Beta Factors ...")
    
    # A. Global Returns Matrix
    all_returns = prices_stocks.pct_change(fill_method=None).fillna(0)
    
    # B. Synthetic Market Return (Equal Weight Index of your Universe)
    market_ret_series = all_returns.mean(axis=1)
    
    # C. Rolling Beta (Vectorized)
    # Beta = Cov(Stock, Market) / Var(Market)
    rolling_cov = all_returns.rolling(window=LOOKBACK_BETA).cov(market_ret_series)
    rolling_var = market_ret_series.rolling(window=LOOKBACK_BETA).var()
    rolling_beta = rolling_cov.div(rolling_var, axis=0).fillna(1.0)
    
    # D. Rolling Momentum (Pure Price Momentum)
    # Standard Asness: t-21 to t-252
    p_lag_1m = prices_stocks.shift(21)
    p_lag_1y = prices_stocks.shift(252 + 21)
    factor_mom = (p_lag_1m / p_lag_1y) - 1

else:
    print("❌ Critical Error: 'prices_stocks' not found.")
    rolling_beta, factor_mom = pd.DataFrame(), pd.DataFrame()

# 2. SIMULATION ENGINE
def run_beta_hedged_fast():
    # --- DATE SETUP (Robust Finder) ---
    # Finds the last valid trading day for every month
    date_tracker = pd.Series(prices_stocks.index, index=prices_stocks.index)
    rebalance_dates = date_tracker.resample(REBALANCE_FREQ).max().dropna()
    
    # Dynamic Start Date: Start 300 days after first data point to allow buffers to fill
    min_start_date = prices_stocks.index[0] + pd.Timedelta(days=300)
    active_rebal_dates = rebalance_dates[rebalance_dates > min_start_date]
    
    if len(active_rebal_dates) < 2:
        print("❌ Error: Not enough data points to run backtest.")
        return pd.Series(dtype=float), pd.Series(dtype=float)

    # Init Tracking
    equity_curve = [10000.0]
    dates = [active_rebal_dates.iloc[0]] 
    hedge_ratios_log = []
    prev_weights = pd.Series(dtype=float)
    
    print(f"   > Simulating {len(active_rebal_dates)} periods...")
    iterator = tqdm(range(len(active_rebal_dates) - 1)) if tqdm_available else range(len(active_rebal_dates) - 1)

    for i in iterator:
        curr_date = active_rebal_dates.iloc[i]
        next_date = active_rebal_dates.iloc[i+1]
        
        # A. LOOKUP MOMENTUM
        if curr_date not in factor_mom.index: continue
        mom_row = factor_mom.loc[curr_date]
        
        valid_mom = mom_row.dropna().sort_values(ascending=False)
        if len(valid_mom) < NUM_POSITIONS * 2: continue
            
        longs = valid_mom.head(NUM_POSITIONS).index
        shorts = valid_mom.tail(NUM_POSITIONS).index
        
        # B. LOOKUP BETA
        if curr_date in rolling_beta.index:
            beta_row = rolling_beta.loc[curr_date]
            beta_long = beta_row[longs].mean()
            beta_short = beta_row[shorts].mean()
        else:
            beta_long, beta_short = 1.0, 1.0
        
        # C. CALC HEDGE RATIO
        # Ratio = Long Beta / Short Beta
        if abs(beta_short) < 0.1: beta_short = 0.1 # Prevent division by zero
        raw_ratio = beta_long / beta_short
        
        # Safety Clip: Never leverage shorts more than 2.5x
        hedge_ratio = np.clip(raw_ratio, 0.5, 2.5)
        hedge_ratios_log.append(hedge_ratio)
        
        # D. WEIGHTING
        weights = pd.Series(0.0, index=list(longs) + list(shorts))
        weights[longs] = 1.0 / len(longs)                # 100% Long
        weights[shorts] = -1.0 * hedge_ratio / len(shorts) # Variable Short
        
        # E. CALCULATE RETURNS
        mask = (all_returns.index > curr_date) & (all_returns.index <= next_date)
        period_rets = all_returns.loc[mask]
        
        if period_rets.empty: continue
            
        w_aligned = weights.reindex(period_rets.columns).fillna(0)
        port_daily_ret = period_rets.dot(w_aligned)
        
        # Compound growth for the month
        period_growth = (1 + port_daily_ret).cumprod().iloc[-1]
        
        # Add Cash Interest (You earn interest on the cash buffer)
        rf_monthly = (1 + RF_RATE)**(1/12) - 1
        
        # Transaction Costs
        w_old_aligned = prev_weights.reindex(weights.index).fillna(0)
        turnover = np.sum(np.abs(weights - w_old_aligned))
        cost = turnover * TX_COST
        
        new_val = equity_curve[-1] * (1 - cost) * period_growth * (1 + rf_monthly)
        
        equity_curve.append(new_val)
        dates.append(next_date)
        prev_weights = weights

    return pd.Series(equity_curve, index=dates), pd.Series(hedge_ratios_log, index=dates[:-1])

# --- 3. EXECUTION ---
if 'prices_stocks' in locals():
    curve_beta, ratios_beta = run_beta_hedged_fast()

    if not curve_beta.empty:
        # --- 4. VISUALIZATION ---
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10), gridspec_kw={'height_ratios': [3, 1]})

        ax1.plot(curve_beta, label='Beta-Hedged Strategy', color='#00897b', linewidth=2)
        ax1.set_title(f"Beta-Hedged Performance (Top {NUM_POSITIONS} L/S)", fontsize=14, fontweight='bold')
        ax1.set_ylabel("Portfolio Value (£)")
        ax1.legend(loc='upper left')
        ax1.grid(True, alpha=0.2)

        ax2.plot(ratios_beta, color='#ff7043', label='Hedge Ratio', linewidth=1.5)
        ax2.axhline(1.0, color='black', linestyle='--', alpha=0.5, label='Dollar Neutral (1.0)')
        ax2.set_title("Dynamic Hedge Ratio (How heavy is the Short?)", fontsize=12, fontweight='bold')
        ax2.set_ylabel("Short Multiplier")
        ax2.legend()
        ax2.grid(True, alpha=0.2)
        
        plt.tight_layout()
        plt.show()

        # --- 5. METRICS ---
        days = (curve_beta.index[-1] - curve_beta.index[0]).days
        years = days / 365.25
        
        total_ret = (curve_beta.iloc[-1] / curve_beta.iloc[0]) - 1
        cagr = (1 + total_ret)**(1/years) - 1 if years > 0 else 0
        
        dd = (curve_beta / curve_beta.cummax()) - 1
        max_dd = dd.min()
        
        vol = curve_beta.pct_change(fill_method=None).std() * np.sqrt(252)
        sharpe = (cagr - RF_RATE) / vol if vol > 0 else 0
        
        print(f"\n{'METRIC':<20} | {'RESULT':<10}")
        print("-" * 35)
        print(f"{'Total Return':<20} | {total_ret:<10.2%}")
        print(f"{'CAGR (Annual)':<20} | {cagr:<10.2%}")
        print(f"{'Max Drawdown':<20} | {max_dd:<10.2%}")
        print(f"{'Sharpe Ratio':<20} | {sharpe:<10.2f}")
        print(f"{'Avg Hedge Ratio':<20} | {ratios_beta.mean():<10.2f}x")
    else:
        print("⚠️ Backtest failed to produce data.")
else:
    print("❌ Critical Error: 'prices_stocks' not found.")
================================================================================
                STRATEGY: BETA-HEDGED MARKET NEUTRAL (OPTIMIZED)                
================================================================================
... Pre-calculating Momentum and Beta Factors ...
   > Simulating 64 periods...
100%|█████████████████████████████████████████████████████████████████████████████████| 63/63 [00:00<00:00, 202.66it/s]
No description has been provided for this image
METRIC               | RESULT    
-----------------------------------
Total Return         | 24.45%    
CAGR (Annual)        | 4.27%     
Max Drawdown         | -50.50%   
Sharpe Ratio         | 0.03      
Avg Hedge Ratio      | 1.19      x
In [11]:
# --- BLOCK 9: BETA-HEDGED TRADING JOURNAL (STRICT BLOCK 8 LINK) ---
print("\n" + "="*80)
print(f"{'TRADING JOURNAL: BETA-HEDGED STRATEGY LOG':^80}")
print("="*80)

# 1. VALIDATION: ENSURE BLOCK 8 RAN
# We strictly check for Block 8 outputs to ensure consistency
required_vars = ['curve_beta', 'ratios_beta', 'factor_mom', 'NUM_POSITIONS']
if not all(var in globals() for var in required_vars):
    print("❌ Critical Error: Data from Block 8 is missing.")
    print("   Please run Block 8 (Optimized) immediately before this.")
else:
    # 2. CONFIGURATION
    display_months = 6   # Number of periods to display
    
    # Get Dates directly from the Block 8 Equity Curve
    # This guarantees the dates match the simulation exactly.
    rebal_dates = curve_beta.index
    report_dates = rebal_dates[-(display_months+1):]

    # Tracking for "New Entry/Exit" logic
    prev_weights = pd.Series(dtype=float)

    # --- HELPER: FORMAT HOLDINGS ---
    def fmt_weights(weights):
        if weights is None or weights.empty: return "Cash"
        # Only show positions with > 0.1% allocation to reduce noise
        active = weights[weights.abs() > 0.001].copy()
        if active.empty: return "Cash"
        # Sort: Longs (Positive) descending, then Shorts (Negative) ascending
        sorted_w = active.sort_values(ascending=False)
        return ", ".join([f"{t}({w:+.1%})" for t, w in sorted_w.items()])

    # --- HELPER: CLASSIFY TRADES (ENTRIES VS EXITS) ---
    def classify_trades(curr, prev):
        curr = curr if curr is not None else pd.Series(dtype=float)
        prev = prev if prev is not None else pd.Series(dtype=float)
        all_tickers = list(set(curr.index) | set(prev.index))
        l_in, l_out, s_in, s_out = [], [], [], []
        
        for t in all_tickers:
            w_new, w_old = curr.get(t, 0.0), prev.get(t, 0.0)
            
            # Long Side Changes
            if w_old <= 0.001 and w_new > 0.001: l_in.append(t)
            elif w_old > 0.001 and w_new <= 0.001: l_out.append(t)
            
            # Short Side Changes
            if w_old >= -0.001 and w_new < -0.001: s_in.append(t)
            elif w_old < -0.001 and w_new >= -0.001: s_out.append(t)

        def clean(l): return ", ".join(l) if l else "-"
        return clean(l_in), clean(l_out), clean(s_in), clean(s_out)

    # 3. THE REPORTING LOOP
    print(f"Generating Journal for the last {len(report_dates)-1} periods...")
    print(f"(Linking to Block 8 results for strict accuracy)")

    for i in range(len(report_dates) - 1):
        curr_date = report_dates[i]
        next_date = report_dates[i+1]
        
        # A. RETRIEVE EXACT PERFORMANCE
        # We look up the Portfolio Value directly from the Block 8 Equity Curve
        try:
            val_start = curve_beta.loc[curr_date]
            val_end   = curve_beta.loc[next_date]
            pnl       = val_end - val_start
            pct_ret   = pnl / val_start if val_start != 0 else 0.0
            
            # Retrieve the EXACT Hedge Ratio used in Block 8
            # .asof() finds the ratio that was active on or before the current date
            hedge_ratio = ratios_beta.asof(curr_date)
            if pd.isna(hedge_ratio): hedge_ratio = 1.0
        except KeyError:
            continue
            
        # B. RECONSTRUCT PORTFOLIO WEIGHTS
        # We assume the same logic as Block 8: Top N Longs, Bottom N Shorts
        # This reconstruction ensures the "Holdings" list matches the "Return"
        curr_weights = pd.Series(dtype=float)
        
        if curr_date in factor_mom.index:
            mom_row = factor_mom.loc[curr_date]
            valid_mom = mom_row.dropna().sort_values(ascending=False)
            
            if len(valid_mom) >= NUM_POSITIONS * 2:
                longs = valid_mom.head(NUM_POSITIONS).index
                shorts = valid_mom.tail(NUM_POSITIONS).index
                
                # Apply Block 8 Weighting Logic:
                # Longs get 100% capital (split N ways)
                # Shorts get -100% * HedgeRatio (split N ways)
                curr_weights = pd.Series(0.0, index=list(longs) + list(shorts))
                curr_weights[longs] = 1.0 / len(longs)
                curr_weights[shorts] = -1.0 * hedge_ratio / len(shorts)

        # C. GENERATE LOG OUTPUT
        l_in, l_out, s_in, s_out = classify_trades(curr_weights, prev_weights)

        print(f"\n📅 PERIOD: {curr_date.strftime('%Y-%m-%d')} to {next_date.strftime('%Y-%m-%d')}")
        print("-" * 80)
        print(f"   💰 Value: £{val_end:,.0f}  |  📈 Month Return: {pct_ret:>+6.2%} (PnL: £{pnl:+,.0f})")
        print(f"   ⚖️  HEDGE RATIO: {hedge_ratio:.2f}x (Short Multiplier)")
        print("-" * 40)
        print(f"   🟢 LONG ENTRY:   {l_in}")
        print(f"   🔴 LONG EXIT:    {l_out}")
        print(f"   📉 SHORT ENTRY:  {s_in}")
        print(f"   🔄 SHORT COVER:  {s_out}")
        print(f"   📊 PORTFOLIO:    {fmt_weights(curr_weights)}")
        
        # Exposure Check
        net_exp = curr_weights.sum()
        gross_exp = curr_weights.abs().sum()
        print(f"      (Net Exposure: {net_exp:.1%} | Gross: {gross_exp:.1%})")
        
        prev_weights = curr_weights

    print("\n✅ Beta-Hedged Journal Complete.")
================================================================================
                   TRADING JOURNAL: BETA-HEDGED STRATEGY LOG                    
================================================================================
Generating Journal for the last 6 periods...
(Linking to Block 8 results for strict accuracy)

📅 PERIOD: 2025-07-31 to 2025-08-29
--------------------------------------------------------------------------------
   💰 Value: £11,057  |  📈 Month Return: -4.01% (PnL: £-462)
   ⚖️  HEDGE RATIO: 1.61x (Short Multiplier)
----------------------------------------
   🟢 LONG ENTRY:   DASH, PLTR, CVNA, NRG, APP, VST, HOOD, HWM, TPR, AXON
   🔴 LONG EXIT:    -
   📉 SHORT ENTRY:  HAL, TER, BIIB, ARE, UNH, REGN, CNC, MRNA, SMCI, DOW
   🔄 SHORT COVER:  -
   📊 PORTFOLIO:    PLTR(+10.0%), HOOD(+10.0%), APP(+10.0%), AXON(+10.0%), CVNA(+10.0%), DASH(+10.0%), HWM(+10.0%), VST(+10.0%), NRG(+10.0%), TPR(+10.0%), ARE(-16.1%), HAL(-16.1%), UNH(-16.1%), BIIB(-16.1%), TER(-16.1%), DOW(-16.1%), SMCI(-16.1%), CNC(-16.1%), REGN(-16.1%), MRNA(-16.1%)
      (Net Exposure: -60.8% | Gross: 260.8%)

📅 PERIOD: 2025-08-29 to 2025-09-30
--------------------------------------------------------------------------------
   💰 Value: £11,463  |  📈 Month Return: +3.67% (PnL: £+406)
   ⚖️  HEDGE RATIO: 2.39x (Short Multiplier)
----------------------------------------
   🟢 LONG ENTRY:   IBKR, UAL, JBL, FIX
   🔴 LONG EXIT:    DASH, NRG, HWM, AXON
   📉 SHORT ENTRY:  ALGN, CAG, ELV, MOH
   🔄 SHORT COVER:  HAL, TER, ARE, SMCI
   📊 PORTFOLIO:    HOOD(+10.0%), PLTR(+10.0%), APP(+10.0%), TPR(+10.0%), CVNA(+10.0%), VST(+10.0%), IBKR(+10.0%), FIX(+10.0%), UAL(+10.0%), JBL(+10.0%), CAG(-23.9%), BIIB(-23.9%), ALGN(-23.9%), ELV(-23.9%), DOW(-23.9%), REGN(-23.9%), MOH(-23.9%), UNH(-23.9%), MRNA(-23.9%), CNC(-23.9%)
      (Net Exposure: -138.7% | Gross: 338.7%)

📅 PERIOD: 2025-09-30 to 2025-10-31
--------------------------------------------------------------------------------
   💰 Value: £11,085  |  📈 Month Return: -3.29% (PnL: £-378)
   ⚖️  HEDGE RATIO: 1.64x (Short Multiplier)
----------------------------------------
   🟢 LONG ENTRY:   RCL
   🔴 LONG EXIT:    JBL
   📉 SHORT ENTRY:  TTD, ADBE, IT
   🔄 SHORT COVER:  BIIB, ALGN, CAG
   📊 PORTFOLIO:    HOOD(+10.0%), APP(+10.0%), PLTR(+10.0%), CVNA(+10.0%), TPR(+10.0%), VST(+10.0%), RCL(+10.0%), UAL(+10.0%), FIX(+10.0%), IBKR(+10.0%), ADBE(-16.4%), ELV(-16.4%), UNH(-16.4%), DOW(-16.4%), TTD(-16.4%), MOH(-16.4%), REGN(-16.4%), IT(-16.4%), CNC(-16.4%), MRNA(-16.4%)
      (Net Exposure: -63.8% | Gross: 263.8%)

📅 PERIOD: 2025-10-31 to 2025-11-28
--------------------------------------------------------------------------------
   💰 Value: £11,097  |  📈 Month Return: +0.11% (PnL: £+12)
   ⚖️  HEDGE RATIO: 0.75x (Short Multiplier)
----------------------------------------
   🟢 LONG ENTRY:   WBD, STX, WDC, TSLA
   🔴 LONG EXIT:    IBKR, UAL, RCL, VST
   📉 SHORT ENTRY:  STZ, ALGN, LYB, TGT, BF-B
   🔄 SHORT COVER:  REGN, ADBE, UNH, ELV, MOH
   📊 PORTFOLIO:    HOOD(+10.0%), APP(+10.0%), PLTR(+10.0%), TPR(+10.0%), WDC(+10.0%), WBD(+10.0%), STX(+10.0%), CVNA(+10.0%), FIX(+10.0%), TSLA(+10.0%), STZ(-7.5%), ALGN(-7.5%), BF-B(-7.5%), LYB(-7.5%), TGT(-7.5%), CNC(-7.5%), MRNA(-7.5%), IT(-7.5%), DOW(-7.5%), TTD(-7.5%)
      (Net Exposure: 24.6% | Gross: 175.4%)

📅 PERIOD: 2025-11-28 to 2025-12-31
--------------------------------------------------------------------------------
   💰 Value: £10,873  |  📈 Month Return: -2.02% (PnL: £-224)
   ⚖️  HEDGE RATIO: 0.77x (Short Multiplier)
----------------------------------------
   🟢 LONG ENTRY:   MU, LRCX, AVGO, SMCI
   🔴 LONG EXIT:    APP, CVNA, FIX, TSLA
   📉 SHORT ENTRY:  CMG, FISV, DECK, ARE, LULU, FDS, MOH
   🔄 SHORT COVER:  ALGN, LYB, DOW, TGT, BF-B, CNC, MRNA
   📊 PORTFOLIO:    HOOD(+10.0%), PLTR(+10.0%), WDC(+10.0%), STX(+10.0%), WBD(+10.0%), SMCI(+10.0%), TPR(+10.0%), MU(+10.0%), AVGO(+10.0%), LRCX(+10.0%), FDS(-7.7%), STZ(-7.7%), ARE(-7.7%), CMG(-7.7%), LULU(-7.7%), MOH(-7.7%), DECK(-7.7%), IT(-7.7%), TTD(-7.7%), FISV(-7.7%)
      (Net Exposure: 23.0% | Gross: 177.0%)

📅 PERIOD: 2025-12-31 to 2026-01-21
--------------------------------------------------------------------------------
   💰 Value: £12,445  |  📈 Month Return: +14.45% (PnL: £+1,572)
   ⚖️  HEDGE RATIO: 0.83x (Short Multiplier)
----------------------------------------
   🟢 LONG ENTRY:   INTC, NEM, FIX
   🔴 LONG EXIT:    TPR, AVGO, SMCI
   📉 SHORT ENTRY:  CHTR, MRNA
   🔄 SHORT COVER:  STZ, FDS
   📊 PORTFOLIO:    WDC(+10.0%), HOOD(+10.0%), STX(+10.0%), MU(+10.0%), NEM(+10.0%), PLTR(+10.0%), INTC(+10.0%), LRCX(+10.0%), WBD(+10.0%), FIX(+10.0%), MRNA(-8.3%), ARE(-8.3%), CMG(-8.3%), CHTR(-8.3%), MOH(-8.3%), LULU(-8.3%), IT(-8.3%), DECK(-8.3%), FISV(-8.3%), TTD(-8.3%)
      (Net Exposure: 16.8% | Gross: 183.2%)

✅ Beta-Hedged Journal Complete.
In [12]:
# --- BLOCK 10: STRATEGY SHOWDOWN (LINKED & OPTIMIZED) ---
try:
    from tqdm import tqdm
    tqdm_available = True
except ImportError:
    tqdm_available = False

print("\n" + "="*95)
print(f"{'HEAD-TO-HEAD: BETA HEDGED vs DOLLAR NEUTRAL vs S&P 500':^95}")
print("="*95)

# 1. VALIDATION: REQUIRE BLOCK 8 RESULTS
# We strictly use the output from Block 8 to ensure the "Beta Hedged" curve is identical.
required_vars = ['curve_beta', 'factor_mom', 'prices_stocks', 'NUM_POSITIONS', 'TX_COST', 'RF_RATE']

if not all(var in globals() for var in required_vars):
    print("❌ Critical Error: Results from Block 8 are missing.")
    print("   Please run Block 8 (Optimized) first.")
else:
    # 2. RETRIEVE BETA HEDGED CURVE
    curve_beta_h = curve_beta.copy()
    simulation_dates = curve_beta.index
    print(f"✅ Linked to Block 8: Using existing Beta-Hedged curve ({len(simulation_dates)} periods).")

    # 3. SIMULATE DOLLAR NEUTRAL (ON SAME DATES)
    # We run the Dollar Neutral strategy (Hedge=1.0) on the EXACT same dates as Block 8.
    def run_dollar_neutral_fast(target_dates):
        # Pre-compute returns
        all_returns = prices_stocks.pct_change(fill_method=None).fillna(0)
        
        equity_curve = [10000.0]
        # FIX: Access first date by position to avoid index lookup issues
        dates = [target_dates[0]]
        prev_weights = pd.Series(dtype=float)
        
        print(f"   > Simulating [DOLLAR_NEUTRAL] to match...")
        iterator = tqdm(range(len(target_dates) - 1)) if tqdm_available else range(len(target_dates) - 1)
        
        for i in iterator:
            curr_date = target_dates[i]
            next_date = target_dates[i+1]
            
            # A. SELECTION (Same as Block 8)
            if curr_date not in factor_mom.index: 
                # Fallback: if data missing, hold cash to match Block 8 behavior
                equity_curve.append(equity_curve[-1])
                dates.append(next_date)
                continue
                
            mom_row = factor_mom.loc[curr_date]
            valid_mom = mom_row.dropna().sort_values(ascending=False)
            
            if len(valid_mom) < NUM_POSITIONS * 2: 
                equity_curve.append(equity_curve[-1])
                dates.append(next_date)
                continue
                
            longs = valid_mom.head(NUM_POSITIONS).index
            shorts = valid_mom.tail(NUM_POSITIONS).index
            
            # B. HEDGE RATIO (STATIC)
            # Dollar Neutral = 1.0 (Equal capital Long and Short)
            hedge_ratio = 1.0 

            # C. WEIGHTING
            weights = pd.Series(0.0, index=list(longs) + list(shorts))
            weights[longs] = 1.0 / len(longs)
            weights[shorts] = -1.0 * hedge_ratio / len(shorts)
            
            # D. RETURNS & COSTS
            mask = (all_returns.index > curr_date) & (all_returns.index <= next_date)
            period_rets = all_returns.loc[mask]
            
            if period_rets.empty: 
                equity_curve.append(equity_curve[-1])
                dates.append(next_date)
                continue
            
            w_aligned = weights.reindex(period_rets.columns).fillna(0)
            port_ret = period_rets.dot(w_aligned)
            period_growth = (1 + port_ret).cumprod().iloc[-1]
            
            rf_monthly = (1 + RF_RATE)**(1/12) - 1
            w_old_aligned = prev_weights.reindex(weights.index).fillna(0)
            turnover = np.sum(np.abs(weights - w_old_aligned))
            cost = turnover * TX_COST
            
            new_val = equity_curve[-1] * (1 - cost) * period_growth * (1 + rf_monthly)
            
            equity_curve.append(new_val)
            dates.append(next_date)
            prev_weights = weights
            
        return pd.Series(equity_curve, index=dates)

    # Run Dollar Neutral Comparison
    curve_dollar = run_dollar_neutral_fast(simulation_dates)

    # 4. BENCHMARK RETRIEVAL (ROBUST)
    if not curve_beta_h.empty:
        s_date, e_date = curve_beta_h.index[0], curve_beta_h.index[-1]
        print(f"   > Retrieving Benchmark (S&P 500 GBP)...")
        try:
            bench_df = yf.download(["^GSPC", "GBPUSD=X"], start=s_date, end=e_date, progress=False, auto_adjust=True)
            
            if isinstance(bench_df.columns, pd.MultiIndex):
                bench_df = bench_df.xs('Close', axis=1, level=0)
            elif 'Close' in bench_df.columns:
                bench_df = bench_df['Close']
            
            # Fill & Clean
            bench_df = bench_df.ffill().bfill()
            
            # Price Conversion
            spx_gbp_daily = bench_df["^GSPC"] / bench_df["GBPUSD=X"]
            
            # Reindex to Strategy Dates
            bench_aligned = spx_gbp_daily.reindex(curve_beta_h.index, method='ffill')
            bench_aligned = bench_aligned.ffill().bfill()
            
            # Normalize to 10k
            curve_bench = bench_aligned / bench_aligned.iloc[0] * 10000
            
        except Exception as e:
            print(f"   ⚠️ Benchmark failed ({e}). Using Flat Line.")
            curve_bench = pd.Series(10000, index=curve_beta_h.index)

    # 5. VISUALIZATION
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12), gridspec_kw={'height_ratios': [2, 1]})

    ax1.plot(curve_beta_h, label='Beta Hedged (Block 8)', color='#00897b', linewidth=2.5)
    ax1.plot(curve_dollar, label='Dollar Neutral (Fixed 1.0)', color='gray', linestyle='--', linewidth=1.5)
    ax1.plot(curve_bench, label='S&P 500 (GBP)', color='black', alpha=0.6, linewidth=1)
    
    ax1.set_title("Strategy Showdown: Hedged vs Benchmark", fontsize=14, fontweight='bold')
    ax1.set_ylabel("Portfolio Value (£)")
    ax1.legend(loc='upper left')
    ax1.grid(True, alpha=0.15)

    dd_beta = (curve_beta_h - curve_beta_h.cummax()) / curve_beta_h.cummax()
    dd_bench = (curve_bench - curve_bench.cummax()) / curve_bench.cummax()

    ax2.plot(dd_beta, label='Beta Hedged DD', color='#00897b', linewidth=1)
    ax2.fill_between(dd_bench.index, dd_bench, 0, color='gray', alpha=0.2, label='Benchmark DD')
    
    ax2.set_title("Risk Profile: Strategy vs Market Drawdown", fontsize=12, fontweight='bold')
    ax2.set_ylabel("Drawdown %")
    ax2.legend()
    ax2.grid(True, alpha=0.15)

    plt.tight_layout()
    plt.show()

    # 6. METRICS TABLE
    def get_metrics(curve):
        if len(curve) < 2 or curve.isnull().all(): return [0,0,0,0,0]
        
        tot = (curve.iloc[-1] / curve.iloc[0]) - 1
        days = (curve.index[-1] - curve.index[0]).days
        years = days / 365.25
        cagr = (curve.iloc[-1] / curve.iloc[0]) ** (1/years) - 1 if years > 0 else 0
        
        monthly_rets = curve.pct_change(fill_method=None).dropna()
        vol = monthly_rets.std() * np.sqrt(12)
        
        sharpe = (cagr - RF_RATE) / vol if vol > 0 else 0
        dd = (curve - curve.cummax()) / curve.cummax()
        return tot, cagr, vol, sharpe, dd.min()

    m_beta = get_metrics(curve_beta_h)
    m_doll = get_metrics(curve_dollar)
    m_bnch = get_metrics(curve_bench)

    print(f"\n{'METRIC':<18} | {'BETA HEDGED':<15} | {'DOLLAR NEUTRAL':<15} | {'BENCHMARK':<15}")
    print("-" * 80)
    
    labels = ['Total Return', 'CAGR', 'Volatility', 'Sharpe Ratio', 'Max Drawdown']
    fmt = ['.2%', '.2%', '.2%', '.2f', '.2%']
    
    for i, lbl in enumerate(labels):
        print(f"{lbl:<18} | {m_beta[i]:<15{fmt[i]}} | {m_doll[i]:<15{fmt[i]}} | {m_bnch[i]:<15{fmt[i]}}")
    print("-" * 80)

    # Determine Winner
    scores = {'Beta Hedged': m_beta[3], 'Dollar Neutral': m_doll[3], 'Benchmark': m_bnch[3]}
    winner = max(scores, key=scores.get)
    
    print(f"\n🏆 FINAL VERDICT: {winner} (Highest Sharpe Ratio)")
===============================================================================================
                    HEAD-TO-HEAD: BETA HEDGED vs DOLLAR NEUTRAL vs S&P 500                     
===============================================================================================
✅ Linked to Block 8: Using existing Beta-Hedged curve (61 periods).
   > Simulating [DOLLAR_NEUTRAL] to match...
100%|█████████████████████████████████████████████████████████████████████████████████| 60/60 [00:00<00:00, 223.23it/s]
   > Retrieving Benchmark (S&P 500 GBP)...
No description has been provided for this image
METRIC             | BETA HEDGED     | DOLLAR NEUTRAL  | BENCHMARK      
--------------------------------------------------------------------------------
Total Return       | 24.45%          | 93.40%          | 100.32%        
CAGR               | 4.27%           | 13.45%          | 14.22%         
Volatility         | 36.65%          | 38.39%          | 12.97%         
Sharpe Ratio       | 0.12            | 0.35            | 1.09           
Max Drawdown       | -50.50%         | -47.56%         | -14.59%        
--------------------------------------------------------------------------------

🏆 FINAL VERDICT: Benchmark (Highest Sharpe Ratio)

1. The "Crystal Ball" Effect¶

The portfolio so far can be seen as The Honest Backtests: These simulations "walked forward" through time. In 2021, they did not know which stocks would crash in 2022. They had to pick from the entire universe each time the top 10 winners and the top 10 losers. That is why those results looked realistic (and sometimes messy).

2. The mathematically "perfect" portfolios¶

The following portfolio will use Optimization: These blocks started with the final_longs list—the "Survivors" that passed your strict filters TODAY.

You effectively asked: "If I had picked the 10 best stocks of 2025 back in 2024, how would I have done?"

The answer is obviously "Amazing," because you already removed every stock that failed along the way.

3. So, why do we want this optimised portfolios??¶

If the returns are biased, is the code useless? No.

We use these blocks for Risk Estimation, not Return Prediction.

Useless Metric: The "Expected Return" (e.g., 80% annualized). You will likely not get this next month. The momentum will fade.

Useful Metric: The Covariance & Correlation (The "Safe" vs "Aggressive" split).

Even though the returns are biased, the way these stocks move together (correlation) is usually stable.

If we see that Stock A and Stock B move in opposite directions, you should hold them both, so you lower your risk.

CREATING TWO LONG ONLY PORTFOLIOS¶

USING MAX SHARPE OPTIMISATION & INVERSE VOLATILITY WEIGHTS

--- CONFIGURATION ---¶

LOOKBACK_DAYS = 252 # 1 Year Data for Covariance MAX_STOCKS = 10 # Max Assets to Optimize RF_RATE = last_rf # last Risk Free Rate NUM_SIMULATIONS = 2500 # Monte Carlo Density

In [13]:
# --- BLOCK 11: OPTIMIZATION & EFFICIENT FRONTIER VISUALIZATION ---
print("\n" + "="*80)
print(f"{'PORTFOLIO OPTIMIZATION: MAX SHARPE vs INVERSE VOLATILITY':^80}")
print("="*80)

# --- CONFIGURATION ---
LOOKBACK_DAYS   = 252       # 1 Year Data for Covariance
MAX_STOCKS      = 10        # Max Assets to Optimize
RF_RATE         = last_rf      # Risk Free Rate (Default 4%)
NUM_SIMULATIONS = 2500      # Monte Carlo Density

# 1. VALIDATION & RETRIEVAL (Bridge to Block 4 OR Block 8)
survivors = []
source_label = "None"

if 'final_longs' in locals() and len(final_longs) > 0:
    survivors = final_longs
    source_label = "Block 4 (Refined Longs)"
elif 'top_longs' in locals() and len(top_longs) > 0:
    survivors = top_longs
    source_label = "Block 8 (Forecast Longs)"
else:
    print("❌ Error: No tickers found. Please run Block 8 first.")

if survivors:
    print(f"📥 Input Source: {source_label} ({len(survivors)} Tickers)")

    # 2. SMART SELECTION (Capping at Top N)
    # We prioritize tickers with the highest Momentum Score if available
    if len(survivors) > MAX_STOCKS:
        if 'final_score' in locals():
             # Sort by Block 8 Score
            survivors = sorted(survivors, key=lambda t: final_score.get(t, -999), reverse=True)[:MAX_STOCKS]
        elif 'x_data' in locals():
            # Sort by Block 4 Score
            survivors = sorted(survivors, key=lambda t: x_data.get(t, -999), reverse=True)[:MAX_STOCKS]
        else:
            survivors = survivors[:MAX_STOCKS]
        print(f"✂️  Capping at Top {MAX_STOCKS} (Best Momentum Score).")
            
    print(f"💎 Optimization Universe: {', '.join(survivors)}")
    
    # 3. DATA PREPARATION
    # Get recent history for covariance matrix
    subset_prices = prices_stocks[survivors].iloc[-LOOKBACK_DAYS:]
    subset_rets = subset_prices.pct_change(fill_method=None).fillna(0)
    
    mu = subset_rets.mean() * 252
    sigma = subset_rets.cov() * 252
    vol_individual = subset_rets.std() * np.sqrt(252)
    n_assets = len(survivors)

    # --- ENGINE 1: MAX SHARPE RATIO ---
    def neg_sharpe(weights):
        p_ret = np.sum(mu * weights)
        p_vol = np.sqrt(np.dot(weights.T, np.dot(sigma, weights)))
        if p_vol == 0: return 0
        return - (p_ret - RF_RATE) / p_vol

    # Constraints: Fully Invested (Sum=1), Long Only (0-1)
    constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
    bounds = tuple((0.0, 1.0) for _ in range(n_assets))
    init_guess = [1/n_assets] * n_assets
    
    try:
        opt_s = sco.minimize(neg_sharpe, init_guess, method='SLSQP', bounds=bounds, constraints=constraints)
        w_sharpe = opt_s.x
    except Exception as e:
        print(f"⚠️ Optimization Warning: {e}")
        w_sharpe = np.array([1/n_assets] * n_assets)

    # --- ENGINE 2: INVERSE VOLATILITY (Risk Parity-ish) ---
    # Weight = 1 / Volatility (Lower vol gets higher weight)
    inv_vol_raw = 1.0 / (vol_individual + 1e-6) # Avoid div/0
    w_inv_vol = inv_vol_raw / inv_vol_raw.sum()
    w_inv_vol = w_inv_vol.values

    # --- ENGINE 3: EFFICIENT FRONTIER CURVE ---
    # We calculate the minimum volatility for a specific return target
    target_rets = np.linspace(mu.min(), mu.max(), 50)
    frontier_vol = []
    
    for tr in target_rets:
        # Cons: Sum=1 AND Ret=Target
        cons_f = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1},
                  {'type': 'eq', 'fun': lambda x: np.sum(mu * x) - tr})
        
        def port_vol(w):
            return np.sqrt(np.dot(w.T, np.dot(sigma, w)))
            
        res = sco.minimize(port_vol, init_guess, method='SLSQP', bounds=bounds, constraints=cons_f)
        if res.success:
            frontier_vol.append(res['fun'])
        else:
            frontier_vol.append(np.nan) # Skip failed optimizations

    # --- ENGINE 4: MONTE CARLO SIMULATION ---
    # Generate random portfolios to visualize the "Cloud" of possibilities
    sim_ret = []
    sim_vol = []
    sim_sharpe = []
    
    for _ in range(NUM_SIMULATIONS):
        w = np.random.random(n_assets)
        w /= np.sum(w)
        r = np.sum(mu * w)
        v = np.sqrt(np.dot(w.T, np.dot(sigma, w)))
        sim_ret.append(r)
        sim_vol.append(v)
        sim_sharpe.append((r - RF_RATE)/v)

    # --- RESULTS & VISUALIZATION ---
    df_opt = pd.DataFrame({
        'Return': mu,
        'Volatility': vol_individual,
        'Sharpe Wgt': w_sharpe,
        'InvVol Wgt': w_inv_vol
    }, index=survivors).sort_values(by='Sharpe Wgt', ascending=False)

    print("\n📊 FINAL ALLOCATION TABLE")
    print("-" * 88)
    print(f"{'TICKER':<10} | {'RET (Ann)':<10} | {'VOL (Ann)':<10} | {'MAX SHARPE %':<12} | {'INV VOL %':<12}")
    print("-" * 88)
    for t, row in df_opt.iterrows():
        flag = "★" if row['Sharpe Wgt'] > 0.15 else ""
        print(f"{t:<10} | {row['Return']:<10.1%} | {row['Volatility']:<10.1%} | {row['Sharpe Wgt']:<12.1%} {flag:<2}| {row['InvVol Wgt']:<12.1%}")
    print("-" * 88)

    # VISUALIZATION
    fig = plt.figure(figsize=(14, 7))
    gs = fig.add_gridspec(1, 2, width_ratios=[1, 1.3])

    # CHART 1: Weights Comparison
    ax1 = fig.add_subplot(gs[0])
    indices = np.arange(n_assets)
    width = 0.35
    ax1.bar(indices - width/2, df_opt['Sharpe Wgt'], width, label='Max Sharpe', color='#2e7d32', edgecolor='black', alpha=0.9)
    ax1.bar(indices + width/2, df_opt['InvVol Wgt'], width, label='Inv Volatility', color='#1976d2', edgecolor='black', alpha=0.8)
    ax1.set_ylabel("Allocation %")
    ax1.set_title("Optimal Allocations: Aggressive vs Defensive", fontweight='bold')
    ax1.set_xticks(indices)
    ax1.set_xticklabels(df_opt.index, rotation=45)
    ax1.legend()
    ax1.grid(axis='y', alpha=0.15)

    # CHART 2: Efficient Frontier
    ax2 = fig.add_subplot(gs[1])
    
    # A. The Cloud
    sc = ax2.scatter(sim_vol, sim_ret, c=sim_sharpe, cmap='viridis', s=15, alpha=0.4, label='Random Portfolios')
    plt.colorbar(sc, ax=ax2, label='Sharpe Ratio')
    
    # B. The Frontier Line
    # Filter out NaNs for plotting
    clean_vol = [v for v in frontier_vol if not np.isnan(v)]
    clean_ret = [r for r, v in zip(target_rets, frontier_vol) if not np.isnan(v)]
    ax2.plot(clean_vol, clean_ret, 'k--', linewidth=2, label='Efficient Frontier')
    
    # C. The Optimized Points
    def get_port_stats(w):
        ret = np.sum(mu * w)
        vol = np.sqrt(np.dot(w.T, np.dot(sigma, w)))
        return ret, vol
    
    ret_s, vol_s = get_port_stats(w_sharpe)
    ret_i, vol_i = get_port_stats(w_inv_vol)
    
    ax2.scatter(vol_s, ret_s, c='gold', s=250, marker='*', edgecolors='black', label='Max Sharpe (Optimal)', zorder=10)
    ax2.scatter(vol_i, ret_i, c='cyan', s=150, marker='D', edgecolors='black', label='Inverse Vol (Stable)', zorder=10)
    
    # D. Assets
    ax2.scatter(df_opt['Volatility'], df_opt['Return'], c='white', edgecolors='black', s=50, label='Individual Assets', zorder=5)
    for t, row in df_opt.iterrows():
        ax2.text(row['Volatility'], row['Return'], f"  {t}", fontsize=8, va='center')

    ax2.set_xlabel("Volatility (Risk)")
    ax2.set_ylabel("Annualized Return")
    ax2.set_title(f"Efficient Frontier (n={NUM_SIMULATIONS})", fontweight='bold')
    ax2.grid(True, alpha=0.2)
    ax2.legend(loc='upper left', fontsize=9, framealpha=0.9)

    plt.tight_layout()
    plt.show()

    # --- METRICS & CONCENTRATION ---
    def calc_concentration(w):
        return np.sum(sorted(w, reverse=True)[:3])

    conc_s = calc_concentration(w_sharpe)
    conc_i = calc_concentration(w_inv_vol)

    print("\n🔮 THEORETICAL PORTFOLIO PERFORMANCE:")
    print("-" * 95)
    print(f"   STRATEGY    | EXP RETURN | EXP VOL   | SHARPE | TOP 3 CONC (Risk)")
    print("-" * 95)
    print(f"   Max Sharpe  | {ret_s:<10.1%} | {vol_s:<9.1%} | {(ret_s-RF_RATE)/vol_s:<6.2f} | {conc_s:<10.1%} {'(Aggressive)' if conc_s > 0.5 else '(Diversified)'}")
    print(f"   Inverse Vol | {ret_i:<10.1%} | {vol_i:<9.1%} | {(ret_i-RF_RATE)/vol_i:<6.2f} | {conc_i:<10.1%} {'(Aggressive)' if conc_i > 0.5 else '(Diversified)'}")
    print("-" * 95)

else:
    print("❌ No survivors found to optimize.")
================================================================================
            PORTFOLIO OPTIMIZATION: MAX SHARPE vs INVERSE VOLATILITY            
================================================================================
📥 Input Source: Block 4 (Refined Longs) (10 Tickers)
💎 Optimization Universe: INTC, CVS, NEM, HII, ALB, IVZ, DG, STX, BK, GS

📊 FINAL ALLOCATION TABLE
----------------------------------------------------------------------------------------
TICKER     | RET (Ann)  | VOL (Ann)  | MAX SHARPE % | INV VOL %   
----------------------------------------------------------------------------------------
DG         | 72.7%      | 37.0%      | 28.3%        ★ | 10.4%       
NEM        | 103.6%     | 41.1%      | 22.2%        ★ | 9.4%        
STX        | 134.0%     | 56.3%      | 17.7%        ★ | 6.9%        
HII        | 77.5%      | 38.3%      | 16.4%        ★ | 10.1%       
INTC       | 105.2%     | 65.1%      | 8.6%           | 5.9%        
CVS        | 35.1%      | 30.7%      | 6.8%           | 12.6%       
GS         | 39.2%      | 31.8%      | 0.0%           | 12.1%       
BK         | 32.0%      | 23.5%      | 0.0%           | 16.5%       
IVZ        | 46.1%      | 39.4%      | 0.0%           | 9.8%        
ALB        | 87.0%      | 61.2%      | 0.0%           | 6.3%        
----------------------------------------------------------------------------------------
No description has been provided for this image
🔮 THEORETICAL PORTFOLIO PERFORMANCE:
-----------------------------------------------------------------------------------------------
   STRATEGY    | EXP RETURN | EXP VOL   | SHARPE | TOP 3 CONC (Risk)
-----------------------------------------------------------------------------------------------
   Max Sharpe  | 91.4%      | 24.3%     | 3.76   | 68.2%      (Aggressive)
   Inverse Vol | 65.0%      | 22.0%     | 2.96   | 41.2%      (Diversified)
-----------------------------------------------------------------------------------------------

CREATING 3 PORTFOLIOS ALLOWING SHORTSELLING:¶

WEIGHTS ARE CALCULATED --> MAX SHARPE OPTIMISATION - INVERSE VOLATILITY - EQUAL WEIGHTS

In [14]:
# --- BLOCK 12: OPTIMIZED LONG/SHORT FORECAST (LINKED TO BLOCK 4) ---
print("\n" + "="*95)
print(f"{'PREDICTIVE ENGINE: OFFICIAL TRADING SIGNALS (FROM BLOCK 4 CANDIDATES)':^95}")
print("="*95)

# --- CONFIGURATION ---
LOOKBACK_OPT    = 252    # 12 Months Data for Optimization
RF_RATE         = last_rf   # last Risk Free Rate
TARGET_POS      = 10     # Max 10 Longs / 10 Shorts

# 1. RETRIEVE CANDIDATES FROM BLOCK 4
if 'final_longs' not in locals() or 'final_shorts' not in locals():
    print("❌ Critical Error: 'final_longs' or 'final_shorts' missing.")
    print("   Please run Block 4 first to generate your candidate lists.")
else:
    print(f"📥 Input: Using candidates directly from Block 4.")
    
    # 2. FILTER & SORT (Ensuring we have the best of the Block 4 list)
    # We calculate the latest momentum just to sort them by strength
    latest_mom = (prices_stocks.iloc[-1] / prices_stocks.iloc[-252]) - 1
    
    # Filter: Ensure Block 4 candidates exist in our price data
    valid_longs = [t for t in final_longs if t in latest_mom.index]
    valid_shorts = [t for t in final_shorts if t in latest_mom.index]
    
    # Select Top N (Best Longs, Worst Shorts) based on current momentum
    # This handles cases where Block 4 might have given you 15 candidates but you only want 10
    top_longs = latest_mom[valid_longs].nlargest(TARGET_POS).index.tolist()
    top_shorts = latest_mom[valid_shorts].nsmallest(TARGET_POS).index.tolist()
    
    print(f"💎 FINAL SELECTION:")
    print(f"   • Longs ({len(top_longs)}):  {', '.join(top_longs)}")
    print(f"   • Shorts ({len(top_shorts)}): {', '.join(top_shorts)}")

    # 3. OPTIMIZATION ENGINE
    def run_optimizer_leg(tickers, side="LONG"):
        if not tickers: 
            return pd.Series(dtype=float), pd.Series(dtype=float), pd.Series(dtype=float)
        
        # Get Data
        subset = prices_stocks[tickers].iloc[-LOOKBACK_OPT:]
        rets = subset.pct_change(fill_method=None).fillna(0)
        
        # Invert returns for Shorts (Optimize the 'drop')
        if side == "SHORT": rets = rets * -1
            
        mu = rets.mean() * 252
        sigma = rets.cov() * 252
        vol = rets.std() * np.sqrt(252)
        n = len(tickers)
        
        # A. Max Sharpe
        def neg_sharpe(w):
            p_ret = np.sum(mu * w)
            p_vol = np.sqrt(np.dot(w.T, np.dot(sigma, w)))
            if p_vol == 0: return 0
            return - (p_ret - RF_RATE) / p_vol

        try:
            cons = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
            bounds = tuple((0.0, 1.0) for _ in range(n))
            init_guess = [1/n] * n
            res = sco.minimize(neg_sharpe, init_guess, method='SLSQP', bounds=bounds, constraints=cons)
            w_agg = pd.Series(res.x, index=tickers)
        except:
            w_agg = pd.Series(1/n, index=tickers)
            
        # B. Inverse Volatility (Safe)
        inv_vol_raw = 1.0 / (vol + 1e-6)
        w_safe = inv_vol_raw / inv_vol_raw.sum()
        
        # C. Equal Weight
        w_equal = pd.Series(1.0/n, index=tickers)
        
        return w_agg, w_safe, w_equal

    # 4. EXECUTE
    print("\n... optimizing allocations ...")
    w_la, w_ls, w_le = run_optimizer_leg(top_longs, "LONG")
    w_sa, w_ss, w_se = run_optimizer_leg(top_shorts, "SHORT")

    # 5. GENERATE REPORT
    print("\n" + "="*95)
    print(f"{'🏆 OFFICIAL TRADING SIGNALS (NEXT MONTH) 🏆':^95}")
    print("="*95)
    
    df_long = pd.DataFrame({'Side': 'LONG', 'Mom': latest_mom[top_longs], 'Aggressive': w_la, 'Safe': w_ls, 'Equal': w_le})
    df_short = pd.DataFrame({'Side': 'SHORT', 'Mom': latest_mom[top_shorts], 'Aggressive': w_sa, 'Safe': w_ss, 'Equal': w_se})
    
    # Sort by Aggressive Weight
    df_long = df_long.sort_values(by='Aggressive', ascending=False)
    df_short = df_short.sort_values(by='Aggressive', ascending=False)
    
    full_df = pd.concat([df_long, df_short])

    print(f"{'TICKER':<8} | {'SIDE':<5} | {'MOMENTUM':<8} | {'AGGRESSIVE':<12} | {'SAFE (VOL)':<12} | {'EQUAL':<10} | {'ACTION'}")
    print("-" * 95)
    
    for t, row in full_df.iterrows():
        act = "BUY" if row['Side'] == 'LONG' else "SHORT"
        high_conviction = "★" if row['Aggressive'] > 0.15 else ""
        print(f"{t:<8} | {row['Side']:<5} | {row['Mom']:<8.1%} | {row['Aggressive']:<12.1%} {high_conviction:<1}| {row['Safe']:<12.1%} | {row['Equal']:<10.1%} | {act}")

    print("-" * 95)
    print("NOTE: Aggressive = Max Sharpe Ratio | Safe = Inverse Volatility | Equal = 1/N")

    # 6. VISUALIZATION
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 7))
    
    def plot_allocations(ax, df, title, main_col):
        y_pos = np.arange(len(df))
        h = 0.25
        ax.barh(y_pos + h, df['Aggressive'], height=h, label='Aggressive', color='#00897b', edgecolor='black', alpha=0.9)
        ax.barh(y_pos,     df['Safe'],       height=h, label='Safe',       color='#ffb300', edgecolor='black', alpha=0.9)
        ax.barh(y_pos - h, df['Equal'],      height=h, label='Equal',      color='#3949ab', edgecolor='black', alpha=0.8)
        ax.set_yticks(y_pos)
        ax.set_yticklabels(df.index, fontsize=10, fontweight='bold')
        ax.invert_yaxis()
        ax.set_xlabel("Allocation %")
        ax.set_title(title, fontsize=12, fontweight='bold', color=main_col)
        ax.legend(loc='lower right', fontsize=9)
        ax.grid(axis='x', linestyle='--', alpha=0.3)

    if not df_long.empty: plot_allocations(ax1, df_long, "Long Allocation", '#2e7d32')
    if not df_short.empty: plot_allocations(ax2, df_short, "Short Allocation", '#c62828')

    plt.tight_layout()
    plt.show()

    print("✅ FORECAST GENERATED.")
===============================================================================================
             PREDICTIVE ENGINE: OFFICIAL TRADING SIGNALS (FROM BLOCK 4 CANDIDATES)             
===============================================================================================
📥 Input: Using candidates directly from Block 4.
💎 FINAL SELECTION:
   • Longs (10):  STX, NEM, INTC, HII, ALB, DG, IVZ, GS, CVS, BK
   • Shorts (10): TTD, IT, LULU, GDDY, DECK, DVA, FDS, NOW, CDW, HPQ

... optimizing allocations ...

===============================================================================================
                           🏆 OFFICIAL TRADING SIGNALS (NEXT MONTH) 🏆                           
===============================================================================================
TICKER   | SIDE  | MOMENTUM | AGGRESSIVE   | SAFE (VOL)   | EQUAL      | ACTION
-----------------------------------------------------------------------------------------------
DG       | LONG  | 93.4%    | 28.3%        ★| 10.4%        | 10.0%      | BUY
NEM      | LONG  | 158.5%   | 22.2%        ★| 9.4%         | 10.0%      | BUY
STX      | LONG  | 225.8%   | 17.7%        ★| 6.9%         | 10.0%      | BUY
HII      | LONG  | 101.3%   | 16.4%        ★| 10.1%        | 10.0%      | BUY
INTC     | LONG  | 133.2%   | 8.6%          | 5.9%         | 10.0%      | BUY
CVS      | LONG  | 35.6%    | 6.8%          | 12.6%        | 10.0%      | BUY
GS       | LONG  | 40.6%    | 0.0%          | 12.1%        | 10.0%      | BUY
IVZ      | LONG  | 46.8%    | 0.0%          | 9.8%         | 10.0%      | BUY
ALB      | LONG  | 97.8%    | 0.0%          | 6.3%         | 10.0%      | BUY
BK       | LONG  | 33.9%    | 0.0%          | 16.5%        | 10.0%      | BUY
GDDY     | SHORT | -54.4%   | 25.1%        ★| 11.8%        | 10.0%      | SHORT
FDS      | SHORT | -44.4%   | 24.1%        ★| 13.4%        | 10.0%      | SHORT
DVA      | SHORT | -45.7%   | 20.9%        ★| 12.6%        | 10.0%      | SHORT
IT       | SHORT | -62.4%   | 20.4%        ★| 9.2%         | 10.0%      | SHORT
LULU     | SHORT | -58.0%   | 8.0%          | 7.9%         | 10.0%      | SHORT
TTD      | SHORT | -72.1%   | 1.6%          | 5.2%         | 10.0%      | SHORT
DECK     | SHORT | -46.4%   | 0.0%          | 7.6%         | 10.0%      | SHORT
NOW      | SHORT | -42.5%   | 0.0%          | 9.9%         | 10.0%      | SHORT
CDW      | SHORT | -41.1%   | 0.0%          | 12.0%        | 10.0%      | SHORT
HPQ      | SHORT | -40.2%   | 0.0%          | 10.4%        | 10.0%      | SHORT
-----------------------------------------------------------------------------------------------
NOTE: Aggressive = Max Sharpe Ratio | Safe = Inverse Volatility | Equal = 1/N
No description has been provided for this image
✅ FORECAST GENERATED.
In [15]:
# --- BLOCK 13: FORWARD-LOOKING RISK FORECAST (MONTE CARLO) ---
print("\n" + "="*80)
print(f"{'RISK FORECAST: NEXT MONTH SIMULATION (21 DAYS)':^80}")
print("="*80)

# 1. SETUP & DATA COLLECTION
# These variables come directly from Block 12
required_vars = ['w_la', 'w_ls', 'w_le', 'w_sa', 'w_ss', 'w_se', 'top_longs', 'top_shorts', 'prices_stocks']

if not all(var in globals() for var in required_vars):
    print("❌ Error: Weights or lists not found. Please run Block 12 first.")
else:
    # Combine Long and Short weights into a single Net Portfolio
    # Assumption: 50% Capital Allocated to Longs, 50% to Shorts (Dollar Neutral start)
    capital_allocation = 0.50
    
    # Helper to build Net Weight Series
    def get_net_portfolio(w_long, w_short):
        # Longs are positive, Shorts are negative
        # w_short comes in as positive weights (sum=1), so we multiply by -1
        # .add ensures we align tickers correctly if there's overlap (rare but possible)
        net = (w_long * capital_allocation).add(w_short * -capital_allocation, fill_value=0)
        return net

    port_agg = get_net_portfolio(w_la, w_sa) # Aggressive (Teal)
    port_saf = get_net_portfolio(w_ls, w_ss) # Safe (Amber)
    port_eq  = get_net_portfolio(w_le, w_se) # Equal (Indigo)

    # 2. CALCULATE PORTFOLIO METRICS (Ex-Ante)
    # We need the covariance matrix of ALL stocks involved (Longs + Shorts)
    all_tickers = list(set(top_longs + top_shorts))
    
    # Use last 6 months (126 days) for sensitive correlation estimate
    subset = prices_stocks[all_tickers].iloc[-126:] 
    cov_matrix = subset.pct_change(fill_method=None).fillna(0).cov() * 252
    mu_vector  = subset.pct_change(fill_method=None).fillna(0).mean() * 252

    def calc_stats(weights, name):
        # Align weights with covariance matrix
        w = weights.reindex(cov_matrix.columns).fillna(0)
        
        # Portfolio Variance = w^T * Cov * w
        var = np.dot(w.T, np.dot(cov_matrix, w))
        vol = np.sqrt(var)
        
        # Expected Return = Sum(w * mu)
        ret = np.dot(w, mu_vector.reindex(w.index).fillna(0))
        
        return ret, vol

    stats_agg = calc_stats(port_agg, "Aggressive")
    stats_saf = calc_stats(port_saf, "Safe")
    stats_eq  = calc_stats(port_eq,  "Equal")

    # 3. REPORT: RISK PROFILE
    print(f"\n📊 EXPECTED RISK PROFILE (ANNUALIZED)")
    print("-" * 80)
    print(f"{'STRATEGY':<15} | {'EXP RETURN':<12} | {'VOLATILITY':<12} | {'SHARPE':<8} | {'VaR (95%)':<10}")
    print("-" * 80)

    stats_list = [
        ("Aggressive", stats_agg, "#00897b"), # Teal
        ("Safe",       stats_saf, "#ffb300"), # Amber
        ("Equal Wgt",  stats_eq,  "#3949ab")  # Indigo
    ]

    for name, (r, v), color in stats_list:
        sharpe = (r - 0.04) / v if v > 0 else 0
        # Value at Risk (Monthly 95%): 1.65 std devs * Monthly Vol
        monthly_vol = v / np.sqrt(12)
        var_95 = -1.65 * monthly_vol
        
        print(f"{name:<15} | {r:<12.1%} | {v:<12.1%} | {sharpe:<8.2f} | {var_95:<10.1%}")
    print("-" * 80)
    print("Note: VaR (95%) = The maximum expected loss in a month with 95% confidence.")

    # 4. MONTE CARLO SIMULATION (The "Cone of Uncertainty")
    days = 21 # Next trading month
    simulations = 1000
    start_val = 10000

    plt.figure(figsize=(14, 6))

    def run_simulation(mu, sigma, name, color, subplot_idx):
        dt = 1/252
        # Random paths: exp((mu - 0.5*sig^2)*dt + sig*sqrt(dt)*Z)
        paths = np.zeros((days, simulations))
        paths[0] = start_val
        
        for t in range(1, days):
            z = np.random.standard_normal(simulations)
            # Geometric Brownian Motion formula
            drift = (mu - 0.5 * sigma**2) * dt
            shock = sigma * np.sqrt(dt) * z
            paths[t] = paths[t-1] * np.exp(drift + shock)
            
        # Plotting
        ax = plt.subplot(1, 3, subplot_idx)
        
        # The "Cloud"
        ax.plot(paths, color=color, alpha=0.05) 
        
        # The Median
        ax.plot(np.median(paths, axis=1), color='black', linewidth=2, linestyle='--', label='Median') 
        
        # Percentiles (The Cone)
        p5 = np.percentile(paths, 5, axis=1)
        p95 = np.percentile(paths, 95, axis=1)
        ax.fill_between(range(days), p5, p95, color=color, alpha=0.2, label='90% Conf.')
        
        # Stats
        final_vals = paths[-1]
        win_rate = np.sum(final_vals > start_val) / simulations
        
        ax.set_title(f"{name}\nWin Rate: {win_rate:.0%}", fontweight='bold')
        ax.set_xlabel("Trading Days")
        if subplot_idx == 1: ax.set_ylabel("Portfolio Value (£)")
        ax.grid(True, alpha=0.2)
        ax.legend(loc='upper left')

    # Run for all 3 using consistent colors
    run_simulation(stats_agg[0], stats_agg[1], "Aggressive (Sharpe)", "#00897b", 1)
    run_simulation(stats_saf[0], stats_saf[1], "Safe (Inv Vol)",      "#ffb300", 2)
    run_simulation(stats_eq[0],  stats_eq[1],  "Equal Weight",        "#3949ab", 3)

    plt.tight_layout()
    plt.show()

    print("\n✅ SIMULATION COMPLETE.")
    print("   • The charts above show 1,000 possible outcomes for the next month.")
    print("   • 'Win Rate' = Probability of ending the month with a profit.")
================================================================================
                 RISK FORECAST: NEXT MONTH SIMULATION (21 DAYS)                 
================================================================================

📊 EXPECTED RISK PROFILE (ANNUALIZED)
--------------------------------------------------------------------------------
STRATEGY        | EXP RETURN   | VOLATILITY   | SHARPE   | VaR (95%) 
--------------------------------------------------------------------------------
Aggressive      | 95.0%        | 16.1%        | 5.65     | -7.7%     
Safe            | 78.7%        | 11.3%        | 6.61     | -5.4%     
Equal Wgt       | 87.8%        | 13.0%        | 6.46     | -6.2%     
--------------------------------------------------------------------------------
Note: VaR (95%) = The maximum expected loss in a month with 95% confidence.
No description has been provided for this image
✅ SIMULATION COMPLETE.
   • The charts above show 1,000 possible outcomes for the next month.
   • 'Win Rate' = Probability of ending the month with a profit.

VERIFICATION: DID THE PREVIOUS MONTH'S FORECAST COME TRUE?¶

Sets up the "Time Machine": It rewinds the clock by 21 trading days (approximately one month), establishing a "Hypothetical Decision Date." It then defines a "Test Period" from that decision date to the present (the "Unknown Future").

Runs a Blind Forecast: It strictly slices the data (prices_stocks) to include only history up to the decision date. It then calculates the 12-month momentum, applies a 200-day Moving Average trend filter (just like the live strategy), and selects the top 10 stocks. This simulates exactly what the algorithm would have picked back then without knowing the future.

Optimizes the Blind Portfolio: It runs the "Max Sharpe" optimization on those blind picks using the limited history, determining the optimal weights.

Verifies Performance (The Reveal): It then fast-forwards to the present and calculates how that specific portfolio actually performed over the last 21 days (future_prices). It aligns the portfolio returns with the real market data.

Benchmarks Results: It compares the strategy's performance against the S&P 500 (converted to GBP) over the same period.

Generates a Report Card: It prints a clear summary of the Strategy Return vs. Benchmark Return, the Alpha (Excess Return), and a detailed breakdown of each ticker's predicted weight vs. its actual return.

Visualizes the Result: It plots an equity curve showing the strategy's performance against the benchmark during this out-of-sample test period.

In [16]:
# --- BLOCK 14: FORECAST VERIFICATION (OUT-OF-SAMPLE TEST) ---
print("\n" + "="*80)
print(f"{'VERIFICATION: DID THE PREVIOUS MONTH\'S FORECAST COME TRUE?':^80}")
print("="*80)

# 1. SETUP THE "TIME MACHINE"
if 'prices_stocks' in locals() and len(prices_stocks) > 300:
    today_real = prices_stocks.index[-1]
    
    # Go back 21 trading days (approx 1 month)
    # This is our "Hypothetical Decision Date"
    decision_date_idx = -21
    decision_date = prices_stocks.index[decision_date_idx]
    
    # The "Test Period" is from Decision Date -> Today
    test_start = decision_date
    test_end   = today_real
    
    # PARAMETERS (Must match Block 12)
    LOOKBACK_TREND = 252   # 12 Months
    LOOKBACK_OPT   = 126   # 6 Months
    TARGET_POS     = 10    # Top 10 Stocks
    # Fallback for RF_RATE if not defined globally
    RF_RATE = globals().get('last_rf', 0.04)

    print(f"⚙️ SIMULATION PARAMETERS:")
    print(f"   • Hypothetical Decision Date: {decision_date.strftime('%Y-%m-%d')}")
    print(f"   • Blind Trend Window:         {prices_stocks.index[decision_date_idx - LOOKBACK_TREND].strftime('%Y-%m-%d')} -> {decision_date.strftime('%Y-%m-%d')}")
    print(f"   • Verification Period:        {decision_date.strftime('%Y-%m-%d')} -> {today_real.strftime('%Y-%m-%d')} (The 'Hidden' Month)")

    # 2. RUN THE FORECAST (AS OF THE PAST)
    # We slice data to ONLY see what was available back then
    history_blind = prices_stocks.loc[:decision_date]
    
    # A. Momentum Scan (Blind)
    # Calculate 12-Month Return using only past data
    # Safe check: ensure lookback exists
    if len(history_blind) > LOOKBACK_TREND:
        mom_blind = (history_blind.iloc[-1] / history_blind.iloc[-LOOKBACK_TREND]) - 1
    else:
        mom_blind = (history_blind.iloc[-1] / history_blind.iloc[0]) - 1

    # B. Filter (Blind)
    # We verify the trend was positive BACK THEN (Simple MA Filter)
    ma_200 = history_blind.iloc[-200:].mean()
    trend_ok = history_blind.iloc[-1] > ma_200
    
    # Only pick stocks that were trending UP back then
    valid_universe = mom_blind[trend_ok].dropna().sort_values(ascending=False)
    
    if valid_universe.empty:
        print("⚠️ No stocks passed the trend filter in the past period.")
        top_picks_blind = []
    else:
        top_picks_blind = valid_universe.head(TARGET_POS).index.tolist()
        print(f"\n🔎 PAST PREDICTIONS (Made on {decision_date.date()}):")
        if top_picks_blind:
            print(f"   • Top Blind Pick: {top_picks_blind[0]} (Trend: {valid_universe.iloc[0]:.1%})")

    # C. Optimization (Blind - Max Sharpe)
    def optimize_blind_aggressive(tickers):
        if not tickers: return pd.Series(dtype=float)
        
        # Slice data strictly to the past
        p_sub = history_blind[tickers].iloc[-LOOKBACK_OPT:]
        rets = p_sub.pct_change(fill_method=None).fillna(0)
        
        mu = rets.mean() * 252
        sigma = rets.cov() * 252
        n = len(tickers)
        
        def neg_sharpe(w):
            p_ret = np.sum(mu * w)
            p_vol = np.sqrt(np.dot(w.T, np.dot(sigma, w)))
            if p_vol == 0: return 0
            return - (p_ret - RF_RATE) / p_vol
            
        cons = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
        bnds = tuple((0.0, 1.0) for _ in range(n))
        
        try:
            init_guess = [1/n] * n
            res = sco.minimize(neg_sharpe, init_guess, method='SLSQP', bounds=bnds, constraints=cons)
            return pd.Series(res.x, index=tickers)
        except:
            return pd.Series(1/n, index=tickers)

    if top_picks_blind:
        print("... Optimizing Blind Portfolio (Max Sharpe) ...")
        w_verify = optimize_blind_aggressive(top_picks_blind)
        w_verify = w_verify[w_verify > 0.001] # Filter noise

        # 3. VERIFY PERFORMANCE (THE REVEAL)
        # Now we check how these specific weights performed in the "Unknown Future" (Last 21 days)
        future_prices = prices_stocks.loc[test_start:test_end]
        future_rets = future_prices.pct_change(fill_method=None).fillna(0)
        
        # Portfolio Return
        valid_assets = [t for t in w_verify.index if t in future_rets.columns]
        
        if not valid_assets:
            print("❌ Error: No valid assets found for verification.")
        else:
            # Align weights and calculate daily returns
            aligned_w = w_verify[valid_assets].reindex(future_rets.columns).fillna(0)
            port_daily_ret = future_rets.dot(aligned_w)
            
            # Grow a hypothetical £10,000 account
            port_curve = 10000 * (1 + port_daily_ret).cumprod()
            
            # 4. BENCHMARK (S&P 500 in GBP)
            # Using Robust Logic from Block 10
            try:
                bench_df = yf.download(["^GSPC", "GBPUSD=X"], start=test_start, end=test_end, progress=False, auto_adjust=True)
                
                if isinstance(bench_df.columns, pd.MultiIndex):
                    bench_df = bench_df.xs('Close', axis=1, level=0)
                elif 'Close' in bench_df.columns:
                    bench_df = bench_df['Close']
                
                bench_df = bench_df.ffill().bfill()
                spx_gbp = bench_df["^GSPC"] / bench_df["GBPUSD=X"]
                
                # Reindex and Normalize
                spx_aligned = spx_gbp.reindex(future_prices.index).ffill()
                bench_curve = 10000 * (spx_aligned / spx_aligned.iloc[0])
                bench_total = (bench_curve.iloc[-1] / 10000) - 1
            except Exception as e:
                print(f"⚠️ Benchmark failed ({e}). Using Flat Line.")
                bench_curve = pd.Series(10000, index=future_prices.index)
                bench_total = 0.0

            # 5. REPORT CARD
            total_ret = (port_curve.iloc[-1] / 10000) - 1
            
            print("\n" + "="*60)
            print(f"{'VERIFICATION RESULTS':^60}")
            print("="*60)
            print(f"Strategy Return (Last 21 Days): {total_ret:+.2%} ({'✅ PROFIT' if total_ret > 0 else '❌ LOSS'})")
            print(f"Benchmark Return (S&P 500):     {bench_total:+.2%}")
            print(f"Alpha (Excess Return):          {total_ret - bench_total:+.2%}")
            print("-" * 60)
            
            # Detailed Breakdown
            print(f"{'TICKER':<10} | {'PREDICTED WEIGHT':<18} | {'ACTUAL RETURN'}")
            print("-" * 60)
            
            if not future_prices.empty:
                asset_total_rets = (future_prices.iloc[-1] / future_prices.iloc[0]) - 1
                sorted_w = w_verify.sort_values(ascending=False)
                for t, w in sorted_w.items():
                    actual_perf = asset_total_rets.get(t, 0.0)
                    marker = "✅" if actual_perf > 0 else "🔻"
                    print(f"{t:<10} | {w:.2%}             | {actual_perf:+.2%} {marker}")
            
            print("-" * 60)

            # 6. VISUALIZATION
            plt.figure(figsize=(10, 5))
            
            plt.plot(port_curve, color='#00c853', linewidth=2, label='Strategy (Blind Forecast)')
            plt.plot(bench_curve, color='black', linestyle='--', alpha=0.6, label='Benchmark (GBP)')
            
            plt.title(f"Truth Test: Out-of-Sample Performance\n({test_start.date()} to {test_end.date()})")
            plt.ylabel("Value (£)")
            plt.legend()
            plt.grid(True, alpha=0.3)
            plt.tight_layout()
            plt.show()
    else:
        print("❌ Blind selection failed. No stocks passed the filter.")

else:
    print("⚠️ Not enough data history to run verification (Need at least 300 days).")
================================================================================
           VERIFICATION: DID THE PREVIOUS MONTH'S FORECAST COME TRUE?           
================================================================================
⚙️ SIMULATION PARAMETERS:
   • Hypothetical Decision Date: 2025-12-24
   • Blind Trend Window:         2025-01-06 -> 2025-12-24
   • Verification Period:        2025-12-24 -> 2026-01-21 (The 'Hidden' Month)

🔎 PAST PREDICTIONS (Made on 2025-12-24):
   • Top Blind Pick: WDC (Trend: 240.3%)
... Optimizing Blind Portfolio (Max Sharpe) ...

============================================================
                    VERIFICATION RESULTS                    
============================================================
Strategy Return (Last 21 Days): +6.81% (✅ PROFIT)
Benchmark Return (S&P 500):     -1.25%
Alpha (Excess Return):          +8.07%
------------------------------------------------------------
TICKER     | PREDICTED WEIGHT   | ACTUAL RETURN
------------------------------------------------------------
NEM        | 29.94%             | +14.37% ✅
WBD        | 26.80%             | -2.70% 🔻
WDC        | 20.56%             | +25.06% ✅
APP        | 15.53%             | -21.71% 🔻
MU         | 7.16%             | +28.28% ✅
------------------------------------------------------------
No description has been provided for this image
In [17]:
# --- BLOCK 14: FORECAST VERIFICATION (FULL 7-WAY TEST) ---
print("\n" + "="*95)
print(f"{'VERIFICATION: AUDITING ALL 6 STRATEGY VARIANTS (LAST 21 DAYS)':^95}")
print("="*95)

# 1. TIME MACHINE SETUP
if 'prices_stocks' in locals() and len(prices_stocks) > 300:
    today_real = prices_stocks.index[-1]
    
    # "Rewind" 21 trading days (approx 1 month)
    decision_date_idx = -21
    decision_date = prices_stocks.index[decision_date_idx]
    
    test_start = decision_date
    test_end   = today_real
    
    # CONFIGURATION (Matching Block 12)
    LOOKBACK_TREND = 252
    LOOKBACK_OPT   = 126
    TARGET_POS     = 10
    # Use global RF_RATE or default
    RF_RATE = globals().get('last_rf', 0.04)

    print(f"⚙️ SIMULATION PARAMETERS:")
    print(f"   • Decision Date (Past):       {decision_date.strftime('%Y-%m-%d')}")
    print(f"   • Verification Period:        {decision_date.strftime('%Y-%m-%d')} -> {today_real.strftime('%Y-%m-%d')}")

    # 2. BLIND EXECUTION (RE-RUNNING LOGIC IN THE PAST)
    history_blind = prices_stocks.loc[:decision_date]
    
    # A. Momentum Selection (Blind)
    # Calculate 12-Month Return
    if len(history_blind) > LOOKBACK_TREND:
        mom_blind = (history_blind.iloc[-1] / history_blind.iloc[-LOOKBACK_TREND]) - 1
    else:
        mom_blind = (history_blind.iloc[-1] / history_blind.iloc[0]) - 1
    
    # Simple Trend Filter (Price > 200 SMA)
    ma_200 = history_blind.iloc[-200:].mean()
    trend_ok = history_blind.iloc[-1] > ma_200
    
    # Identify Candidates
    valid_universe = mom_blind[trend_ok].dropna().sort_values(ascending=False)
    
    if valid_universe.empty:
        print("⚠️ No stocks passed the trend filter in the past period.")
        blind_longs = []
        blind_shorts = []
    else:
        # Top Longs (Best Momentum)
        blind_longs = valid_universe.head(TARGET_POS).index.tolist()
        
        # Top Shorts (Worst Momentum - Inverse Logic)
        # Note: For shorts, we look for weakest momentum in the *entire* pool, ignoring trend filter
        mom_all = mom_blind.dropna().sort_values(ascending=True)
        blind_shorts = mom_all.head(TARGET_POS).index.tolist()

        print(f"\n🔎 PAST PICKS ({decision_date.date()}):")
        if blind_longs: print(f"   • Top Long:  {blind_longs[0]}")
        if blind_shorts: print(f"   • Top Short: {blind_shorts[0]}")

    # B. Optimization Engine (Blind)
    def get_blind_weights(tickers):
        if not tickers: return pd.Series(dtype=float), pd.Series(dtype=float), pd.Series(dtype=float)
        
        # Get Past Data
        subset = history_blind[tickers].iloc[-LOOKBACK_OPT:]
        rets = subset.pct_change(fill_method=None).fillna(0)
        
        mu = rets.mean() * 252
        sigma = rets.cov() * 252
        vol = rets.std() * np.sqrt(252)
        n = len(tickers)
        
        # 1. Max Sharpe (Aggressive)
        def neg_sharpe(w):
            p_ret = np.sum(mu * w)
            p_vol = np.sqrt(np.dot(w.T, np.dot(sigma, w)))
            if p_vol == 0: return 0
            return - (p_ret - RF_RATE) / p_vol
            
        cons = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
        bnds = tuple((0.0, 1.0) for _ in range(n))
        
        try:
            res = sco.minimize(neg_sharpe, [1/n]*n, method='SLSQP', bounds=bnds, constraints=cons)
            w_agg = pd.Series(res.x, index=tickers)
        except:
            w_agg = pd.Series(1/n, index=tickers)
            
        # 2. Inverse Vol (Safe)
        inv_vol_raw = 1.0 / (vol + 1e-6)
        w_safe = inv_vol_raw / inv_vol_raw.sum()
        
        # 3. Equal Weight
        w_equal = pd.Series(1.0/n, index=tickers)
        
        return w_agg, w_safe, w_equal

    if blind_longs:
        print("... Calculating Blind Weights ...")
        # Generate Weights for Longs and Shorts independently
        wl_agg, wl_saf, wl_eq = get_blind_weights(blind_longs)
        ws_agg, ws_saf, ws_eq = get_blind_weights(blind_shorts)

        # 3. CONSTRUCT 6 PORTFOLIOS
        # Helper to combine Long/Short (50/50 Allocation)
        def combine_ls(w_l, w_s):
            # 50% Long, 50% Short (Dollar Neutral)
            return (w_l * 0.5).add(w_s * -0.5, fill_value=0)

        portfolios = {
            # Long Only (100% Long)
            "Long: Aggressive": wl_agg,
            "Long: Safe":       wl_saf,
            "Long: Equal":      wl_eq,
            
            # Long Short (50% L / 50% S)
            "L/S: Aggressive":  combine_ls(wl_agg, ws_agg),
            "L/S: Safe":        combine_ls(wl_saf, ws_saf),
            "L/S: Equal":       combine_ls(wl_eq, ws_eq)
        }

        # 4. VERIFY PERFORMANCE (THE REVEAL)
        future_prices = prices_stocks.loc[test_start:test_end]
        future_rets = future_prices.pct_change(fill_method=None).fillna(0)
        
        results = {}
        curves = {}
        
        for name, weights in portfolios.items():
            # Align
            valid_assets = [t for t in weights.index if t in future_rets.columns]
            w_aligned = weights[valid_assets].reindex(future_rets.columns).fillna(0)
            
            # Calc Return
            daily_ret = future_rets.dot(w_aligned)
            curve = 10000 * (1 + daily_ret).cumprod()
            total_ret = (curve.iloc[-1] / 10000) - 1
            
            results[name] = total_ret
            curves[name] = curve

        # 5. BENCHMARK (S&P 500 GBP)
        try:
            bench_df = yf.download(["^GSPC", "GBPUSD=X"], start=test_start, end=test_end, progress=False, auto_adjust=True)
            if isinstance(bench_df.columns, pd.MultiIndex): bench_df = bench_df.xs('Close', axis=1, level=0)
            elif 'Close' in bench_df.columns: bench_df = bench_df['Close']
            
            bench_df = bench_df.ffill().bfill()
            spx_gbp = bench_df["^GSPC"] / bench_df["GBPUSD=X"]
            spx_aligned = spx_gbp.reindex(future_prices.index).ffill()
            bench_curve = 10000 * (spx_aligned / spx_aligned.iloc[0])
            bench_ret = (bench_curve.iloc[-1] / 10000) - 1
        except:
            bench_curve = pd.Series(10000, index=future_prices.index)
            bench_ret = 0.0
        
        results["Benchmark"] = bench_ret
        curves["Benchmark"] = bench_curve

        # 6. REPORTING
        print("\n" + "="*80)
        print(f"{'VERIFICATION RESULTS':^80}")
        print("="*80)
        print(f"{'STRATEGY':<20} | {'RETURN':<10} | {'OUTCOME'}")
        print("-" * 80)
        
        # Sort results by return
        sorted_res = dict(sorted(results.items(), key=lambda item: item[1], reverse=True))
        
        for name, ret in sorted_res.items():
            outcome = "✅ BEAT MKT" if ret > bench_ret else "---"
            print(f"{name:<20} | {ret:<10.2%} | {outcome}")
        print("-" * 80)

        # 7. VISUALIZATION
        plt.figure(figsize=(12, 7))
        
        # Colors matching Block 12 logic
        colors = {
            "Long: Aggressive": '#00897b', "Long: Safe": '#ffb300', "Long: Equal": '#3949ab',
            "L/S: Aggressive":  '#004d40', "L/S: Safe":  '#ff6f00', "L/S: Equal":  '#1a237e',
            "Benchmark": 'black'
        }
        styles = {
            "Long: Aggressive": '-', "Long: Safe": '-', "Long: Equal": '-',
            "L/S: Aggressive": '--', "L/S: Safe": '--', "L/S: Equal": '--',
            "Benchmark": ':'
        }

        for name, curve in curves.items():
            plt.plot(curve, label=f"{name} ({results[name]:.1%})", 
                     color=colors.get(name, 'gray'), 
                     linestyle=styles.get(name, '-'), 
                     linewidth=2.5 if "Aggressive" in name or "Benchmark" in name else 1.5,
                     alpha=0.6 if "Benchmark" in name else 0.9)

        plt.title(f"Truth Test: 6-Way Strategy Auditing\n({test_start.date()} to {test_end.date()})", fontweight='bold')
        plt.ylabel("Portfolio Value (£)")
        plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)
        plt.grid(True, alpha=0.2)
        plt.tight_layout()
        plt.show()
        
        winner = list(sorted_res.keys())[0]
        print(f"\n🏆 WINNER (Last Month): {winner}")
    else:
        print("⚠️ Blind selection failed. No stocks passed filters.")

else:
    print("⚠️ Not enough history for verification.")
===============================================================================================
                 VERIFICATION: AUDITING ALL 6 STRATEGY VARIANTS (LAST 21 DAYS)                 
===============================================================================================
⚙️ SIMULATION PARAMETERS:
   • Decision Date (Past):       2025-12-24
   • Verification Period:        2025-12-24 -> 2026-01-21

🔎 PAST PICKS (2025-12-24):
   • Top Long:  WDC
   • Top Short: TTD
... Calculating Blind Weights ...

================================================================================
                              VERIFICATION RESULTS                              
================================================================================
STRATEGY             | RETURN     | OUTCOME
--------------------------------------------------------------------------------
Long: Aggressive     | 6.81%      | ✅ BEAT MKT
Long: Safe           | 6.27%      | ✅ BEAT MKT
Long: Equal          | 5.51%      | ✅ BEAT MKT
L/S: Aggressive      | 4.23%      | ✅ BEAT MKT
L/S: Safe            | 3.02%      | ✅ BEAT MKT
L/S: Equal           | 2.72%      | ✅ BEAT MKT
Benchmark            | -1.25%     | ---
--------------------------------------------------------------------------------
No description has been provided for this image
🏆 WINNER (Last Month): Long: Aggressive

Next block acts as your "Quality Control" department.¶

It answers two critical questions before you risk real money:

Are these trends real? (Hurst Exponent)¶

We want to buy stocks that are mathematically "Persistent" (trending). If the Hurst Exponent is 0.5, the price movement is a random walk (gambling).

The trend-detection module of this algorithm relies on Rescaled Range (R/S) Analysis, a statistical method originally developed by Hurst (1951) to measure the long-term persistence of time series data. Although traditional finance assumes asset prices follow a Random Walk, empirical research by Mandelbrot (1963) and Peters (1994) demonstrates that financial markets exhibit 'Long Memory' effects (Macrosynergy, 2023). As proved by Mitra et al. (2012) finding a high value of H exponent indicate the presence of long memory in the time series, in which case future value will depend partially on past values of the series. In other worlds the past price is an indicator of future performance (in the short term).

Are we doubling up on risk? (Correlation)¶

If you buy 5 different stocks but they all have a 95% correlation (e.g., 5 Energy stocks), you haven't diversified; you've just placed one giant bet.

Macrosynergy (2023). Detecting trends and mean reversion with the Hurst exponent | Macrosynergy. [online] Macrosynergy. Available at: https://macrosynergy.com/research/detecting-trends-and-mean-reversion-with-the-hurst-exponent/. Mitra, Suman. (2012). Is Hurst Exponent Value Useful in Forecasting Financial Time Series?. Asian Social Science. 8. 111-111. 10.5539/ass.v8n8p111.

In [18]:
# --- BLOCK 15: STATISTICAL VALIDATION (HURST & CORRELATION) ---
print("\n" + "="*80)
print(f"{'STATISTICAL VALIDATION: ARE THESE TRENDS ROBUST?':^80}")
print("="*80)

# 1. SETUP & DEPENDENCY CHECK
# We prioritize the Safety Containers to audit the full list
validation_universe = []
active_longs = []
active_shorts = []

if 'container_longs' in locals():
    active_longs = container_longs
    print(f"📥 Loaded {len(active_longs)} Longs from Block 4 (Safety Container).")
elif 'final_longs' in locals():
    active_longs = final_longs
    print(f"📥 Loaded {len(active_longs)} Longs from Block 4 (Live Variable).")

if 'container_shorts' in locals():
    active_shorts = container_shorts
    print(f"📥 Loaded {len(active_shorts)} Shorts from Block 4 (Safety Container).")
elif 'final_shorts' in locals():
    active_shorts = final_shorts
    print(f"📥 Loaded {len(active_shorts)} Shorts from Block 4 (Live Variable).")

validation_universe = list(set(active_longs + active_shorts))

if not validation_universe:
    print("⚠️ No tickers found. Please run Block 4 first.")
else:
    print(f"🔬 Analyzing {len(validation_universe)} candidates for statistical robustness...")

    # 2. HELPER: HURST EXPONENT
    # H < 0.5 = Mean Reverting (Choppy)
    # H = 0.5 = Random Walk (Unpredictable)
    # H > 0.5 = Trending (Persistent)
    def get_hurst_exponent(price_series):
        try:
            lags = range(2, 20) 
            
            # Calculate volatility at different lags
            tau = [np.sqrt(np.std(np.subtract(price_series[lag:], price_series[:-lag]))) for lag in lags]
            
            # log-log plot fit
            poly = np.polyfit(np.log(lags), np.log(tau), 1)
            return poly[0] * 2.0
        except:
            return 0.5 

    # 3. RUN STATISTICS
    stats_data = []
    
    # Get last 1 year of data for the check
    subset_prices = prices_stocks[validation_universe].iloc[-252:]
    
    for t in validation_universe:
        prices = subset_prices[t].dropna()
        if len(prices) < 100: continue
            
        # A. Hurst (Trend Quality)
        h_val = get_hurst_exponent(prices.values)
        
        # B. Volatility (Risk)
        vol = prices.pct_change().std() * np.sqrt(252)
        
        # C. Side
        side = "LONG" if t in active_longs else "SHORT"
        
        # D. Verdict
        if h_val > 0.55: quality = "✅ Strong Trend"
        elif h_val < 0.45: quality = "⚠️ Mean Reverting"
        else: quality = "🎲 Random Walk"
            
        stats_data.append({
            'Ticker': t, 'Side': side, 'Hurst': h_val, 
            'Vol': vol, 'Quality': quality
        })
        
    df_stats = pd.DataFrame(stats_data).set_index('Ticker')
    df_stats = df_stats.sort_values(by='Hurst', ascending=False)

    # 4. DISPLAY RESULTS
    print(f"\n{'TICKER':<10} | {'SIDE':<5} | {'HURST':<6} | {'VOL':<8} | {'VERDICT'}")
    print("-" * 65)
    for t, row in df_stats.iterrows():
        # Highlighting the Best Trends
        h_str = f"{row['Hurst']:.2f}"
        if row['Hurst'] > 0.6: h_str += " ★"
        
        print(f"{t:<10} | {row['Side']:<5} | {h_str:<6} | {row['Vol']:<8.1%} | {row['Quality']}")
    print("-" * 65)
    print("NOTE: We want Hurst > 0.5. Values near 0.5 imply gambling (randomness).")

    # 5. DIVERSIFICATION CHECK (Correlation Matrix)
    print("\n🔍 DIVERSIFICATION CHECK (Correlation Heatmap)...")
    
    corr_matrix = subset_prices.pct_change().corr()
    
    plt.figure(figsize=(12, 10))
    # This generates the visual heatmap
    sns.heatmap(corr_matrix, cmap='coolwarm', center=0, annot=False, square=True)
    plt.title("Portfolio Correlation Matrix\n(Red = Dangerous Overlap)", fontsize=12, fontweight='bold')
    plt.tight_layout()
    plt.show()

    # 6. RISK WARNING
    high_corr_pairs = []
    keys = corr_matrix.columns
    for i in range(len(keys)):
        for j in range(i+1, len(keys)):
            if abs(corr_matrix.iloc[i, j]) > 0.85:
                high_corr_pairs.append((keys[i], keys[j], corr_matrix.iloc[i, j]))
                
    if high_corr_pairs:
        print("\n⚠️ CONCENTRATION RISK DETECTED:")
        for a, b, c in high_corr_pairs[:10]: # Limit output to 10 pairs
            print(f"   • {a} and {b} are highly correlated ({c:.2f}). Consider removing one.")
        if len(high_corr_pairs) > 10:
            print(f"   ... and {len(high_corr_pairs)-10} more pairs.")
    else:
        print("\n✅ Good Diversification: No distinct pairs have > 85% correlation.")
================================================================================
                STATISTICAL VALIDATION: ARE THESE TRENDS ROBUST?                
================================================================================
📥 Loaded 43 Longs from Block 4 (Safety Container).
📥 Loaded 24 Shorts from Block 4 (Safety Container).
🔬 Analyzing 67 candidates for statistical robustness...

TICKER     | SIDE  | HURST  | VOL      | VERDICT
-----------------------------------------------------------------
CAH        | LONG  | 0.59   | 27.4%    | ✅ Strong Trend
TPR        | LONG  | 0.58   | 43.5%    | ✅ Strong Trend
IT         | SHORT | 0.56   | 42.7%    | ✅ Strong Trend
CHTR       | SHORT | 0.56   | 38.0%    | ✅ Strong Trend
CMI        | LONG  | 0.55   | 31.6%    | ✅ Strong Trend
DECK       | SHORT | 0.55   | 51.2%    | 🎲 Random Walk
AMAT       | LONG  | 0.54   | 46.4%    | 🎲 Random Walk
LULU       | SHORT | 0.54   | 49.8%    | 🎲 Random Walk
INCY       | LONG  | 0.53   | 34.8%    | 🎲 Random Walk
FDS        | SHORT | 0.53   | 29.1%    | 🎲 Random Walk
DLTR       | LONG  | 0.53   | 42.8%    | 🎲 Random Walk
AMD        | LONG  | 0.53   | 60.0%    | 🎲 Random Walk
IVZ        | LONG  | 0.53   | 39.4%    | 🎲 Random Walk
PCG        | SHORT | 0.53   | 27.9%    | 🎲 Random Walk
HPQ        | SHORT | 0.53   | 37.6%    | 🎲 Random Walk
GOOGL      | LONG  | 0.53   | 32.8%    | 🎲 Random Walk
GOOG       | LONG  | 0.52   | 32.4%    | 🎲 Random Walk
CDW        | SHORT | 0.52   | 32.7%    | 🎲 Random Walk
TEL        | LONG  | 0.52   | 31.2%    | 🎲 Random Walk
MU         | LONG  | 0.51   | 61.9%    | 🎲 Random Walk
JNJ        | LONG  | 0.51   | 19.2%    | 🎲 Random Walk
WELL       | LONG  | 0.51   | 21.8%    | 🎲 Random Walk
CRM        | SHORT | 0.50   | 33.2%    | 🎲 Random Walk
NOW        | SHORT | 0.50   | 39.5%    | 🎲 Random Walk
INTC       | LONG  | 0.50   | 65.2%    | 🎲 Random Walk
LRCX       | LONG  | 0.49   | 50.2%    | 🎲 Random Walk
GS         | LONG  | 0.49   | 31.9%    | 🎲 Random Walk
GLW        | LONG  | 0.49   | 35.7%    | 🎲 Random Walk
XYZ        | SHORT | 0.49   | 52.8%    | 🎲 Random Walk
JBL        | LONG  | 0.48   | 41.2%    | 🎲 Random Walk
COR        | LONG  | 0.48   | 22.2%    | 🎲 Random Walk
EL         | LONG  | 0.48   | 46.1%    | 🎲 Random Walk
TER        | LONG  | 0.48   | 59.1%    | 🎲 Random Walk
CAT        | LONG  | 0.47   | 32.6%    | 🎲 Random Walk
MPWR       | LONG  | 0.47   | 56.7%    | 🎲 Random Walk
ZBRA       | SHORT | 0.47   | 45.9%    | 🎲 Random Walk
HWM        | LONG  | 0.47   | 34.7%    | 🎲 Random Walk
MS         | LONG  | 0.46   | 31.6%    | 🎲 Random Walk
GPN        | SHORT | 0.46   | 40.5%    | 🎲 Random Walk
BK         | LONG  | 0.46   | 23.5%    | 🎲 Random Walk
ALB        | LONG  | 0.46   | 61.3%    | 🎲 Random Walk
DG         | LONG  | 0.46   | 37.1%    | 🎲 Random Walk
TTD        | SHORT | 0.46   | 75.2%    | 🎲 Random Walk
CVNA       | LONG  | 0.45   | 72.1%    | 🎲 Random Walk
HAS        | LONG  | 0.44   | 35.7%    | ⚠️ Mean Reverting
PAYC       | SHORT | 0.44   | 35.6%    | ⚠️ Mean Reverting
APH        | LONG  | 0.44   | 34.5%    | ⚠️ Mean Reverting
CHRW       | LONG  | 0.43   | 37.7%    | ⚠️ Mean Reverting
WDC        | LONG  | 0.43   | 59.3%    | ⚠️ Mean Reverting
KLAC       | LONG  | 0.43   | 44.7%    | ⚠️ Mean Reverting
PYPL       | SHORT | 0.43   | 34.7%    | ⚠️ Mean Reverting
IDXX       | LONG  | 0.42   | 42.5%    | ⚠️ Mean Reverting
STX        | LONG  | 0.40   | 56.4%    | ⚠️ Mean Reverting
BRO        | SHORT | 0.39   | 25.6%    | ⚠️ Mean Reverting
SWKS       | SHORT | 0.39   | 51.0%    | ⚠️ Mean Reverting
FIX        | LONG  | 0.39   | 55.8%    | ⚠️ Mean Reverting
MNST       | LONG  | 0.38   | 22.7%    | ⚠️ Mean Reverting
FISV       | SHORT | 0.38   | 59.2%    | ⚠️ Mean Reverting
CPB        | SHORT | 0.38   | 28.0%    | ⚠️ Mean Reverting
DVA        | SHORT | 0.37   | 31.1%    | ⚠️ Mean Reverting
GDDY       | SHORT | 0.36   | 33.2%    | ⚠️ Mean Reverting
HII        | LONG  | 0.35   | 38.4%    | ⚠️ Mean Reverting
CVS        | LONG  | 0.34   | 30.8%    | ⚠️ Mean Reverting
NEM        | LONG  | 0.32   | 41.1%    | ⚠️ Mean Reverting
RTX        | LONG  | 0.31   | 28.8%    | ⚠️ Mean Reverting
TYL        | SHORT | 0.30   | 27.6%    | ⚠️ Mean Reverting
GIS        | SHORT | 0.21   | 23.3%    | ⚠️ Mean Reverting
-----------------------------------------------------------------
NOTE: We want Hurst > 0.5. Values near 0.5 imply gambling (randomness).

🔍 DIVERSIFICATION CHECK (Correlation Heatmap)...
No description has been provided for this image
⚠️ CONCENTRATION RISK DETECTED:
   • GOOGL and GOOG are highly correlated (1.00). Consider removing one.
   • LRCX and KLAC are highly correlated (0.91). Consider removing one.
   • LRCX and AMAT are highly correlated (0.88). Consider removing one.
   • MS and GS are highly correlated (0.92). Consider removing one.
   • STX and WDC are highly correlated (0.87). Consider removing one.
   • KLAC and AMAT are highly correlated (0.90). Consider removing one.

WHAT ABOUT USING MACHINE LEARNING TO PREDICT ACCURACY IN SIGNALS OF SECURITIES ?¶

According to Breiman (2001), Random Forests avoid the common pitfall of overfitting by leveraging the Law of Large Numbers. The algorithm achieves accuracy by injecting randomness into the training process. Additionally, it allows for self-validation through 'out-of-bag' estimation, providing concrete data on the model's predictive strength. Following the methodology of Khaidem et al. (2016), feature vectors were constructed using technical indicators (RSI, Volatility, SMA Distance) to capture non-linear market dynamics. I used this methodology to scan the sotck in my candicades list and see which stock is more predictable, so to use this filtered universe to optimise the portfolios.

Breiman, L., 2001. Random forests. Machine learning, 45(1), pp.5-32. Khaidem, L., Saha, S. and Dey, Sudeepa Roy (2016). Predicting the direction of stock market prices using random forest. [online] arXiv.org. Available at: https://arxiv.org/abs/1605.00003.

In [19]:
# --- BLOCK 16: AUTOMATED ML AUDIT (FULL UNIVERSE SCAN) ---
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

print("\n" + "="*80)
print(f"{'ML VALIDATION: AUDITING THE FULL STOCK UNIVERSE':^80}")
print("="*80)

# 1. ROBUST CANDIDATE LOADING (Fail-Safe)
active_tickers = set()

# A. Try Loading from Safety Containers (Best Source)
if 'container_longs' in locals():
    active_tickers.update(container_longs)
    if 'container_shorts' in locals(): active_tickers.update(container_shorts)
    print(f"📥 Loaded {len(active_tickers)} candidates from Long and Short Candidates.")

# B. Try Loading from Live Lists (Second Best)
elif 'final_longs' in locals():
    active_tickers.update(final_longs)
    if 'final_shorts' in locals(): active_tickers.update(final_shorts)
    print(f"📥 Loaded {len(active_tickers)} candidates from Live Lists.")

# C. Emergency Fallback (If lists are missing, re-scan Top 50 Momentum)
else:
    print("⚠️ No candidate lists found. Performing emergency Momentum Scan...")
    if 'prices_stocks' in locals():
        # Quick Momentum Calc
        mom = (prices_stocks.iloc[-1] / prices_stocks.iloc[-252]) - 1
        top_50 = mom.nlargest(50).index.tolist()
        active_tickers.update(top_50)
        print(f"📥 Auto-generated {len(active_tickers)} candidates based on Momentum.")
    else:
        print("❌ Critical Error: 'prices_stocks' data is missing. Cannot audit.")

all_tickers = list(active_tickers)

if not all_tickers:
    print("❌ No tickers to audit.")
else:
    print(f"🔎 STARTING AUDIT ON {len(all_tickers)} STOCKS...")
    print(f"   (Training AI models for each ticker...)")

    # 2. DEFINE THE ML ENGINE
    def run_ml_audit(ticker):
        try:
            # A. Setup Data
            df = pd.DataFrame(index=prices_stocks.index)
            df['Close'] = prices_stocks[ticker]
            
            # B. Feature Engineering
            # 1. RSI (Momentum)
            delta = df['Close'].diff()
            gain = (delta.where(delta > 0, 0)).rolling(14).mean()
            loss = (-delta.where(delta < 0, 0)).rolling(14).mean()
            rs = gain / (loss + 1e-6)
            df['RSI'] = 100 - (100 / (1 + rs))
            
            # 2. Trend Distance (Mean Reversion)
            df['SMA_50'] = df['Close'].rolling(50).mean()
            df['Dist_SMA50'] = (df['Close'] / df['SMA_50']) - 1
            
            # 3. Volatility (Risk)
            df['Vol_20D'] = df['Close'].pct_change().rolling(20).std()
            
            # C. Target (Next 5 Days Direction)
            df['Target'] = np.where(df['Close'].shift(-5) > df['Close'], 1, 0)
            df = df.dropna()
            
            # D. Train/Test Split
            features = ['RSI', 'Dist_SMA50', 'Vol_20D']
            X = df[features]
            y = df['Target']
            
            split = int(len(df) * 0.8)
            if split < 50: return 0.0, "Insufficient Data"
            
            X_train, X_test = X.iloc[:split], X.iloc[split:]
            y_train, y_test = y.iloc[:split], y.iloc[split:]
            
            # E. Model
            model = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
            model.fit(X_train, y_train)
            
            # F. Predict & Score
            preds = model.predict(X_test)
            acc = accuracy_score(y_test, preds)
            top_feature = features[np.argmax(model.feature_importances_)]
            
            return acc, top_feature
            
        except Exception as e:
            return 0.0, "Error"

    # 3. RUN THE LOOP
    results = []
    print(f"\n{'TICKER':<10} | {'ACCURACY':<10} | {'PRIMARY DRIVER':<15} | {'VERDICT'}")
    print("-" * 80)

    for t in all_tickers:
        acc, driver = run_ml_audit(t)
        
        if acc > 0.55: verdict = "⭐⭐ HIGH SIGNAL"
        elif acc > 0.51: verdict = "⭐ EDGE FOUND"
        elif acc > 0.49: verdict = "⚪ RANDOM NOISE"
        else: verdict = "❌ TRAP"
        
        # Only print if it's NOT random noise to save screen space
        if acc > 0.51 or acc < 0.49:
             print(f"{t:<10} | {acc:.2%}      | {driver:<15} | {verdict}")
        
        results.append({'Ticker': t, 'Accuracy': acc, 'Top_Feature': driver})

    # 4. EXECUTIVE SUMMARY
    if results:
        df_res = pd.DataFrame(results).sort_values("Accuracy", ascending=False)
        
        # Save results for Block 17
        ml_results_df = df_res.copy()
        
        print("-" * 80)
        print(f"✅ AUDIT COMPLETE.")
        print(f"   • Total Scanned: {len(all_tickers)}")
        print(f"   • Predictable (>51%): {len(df_res[df_res['Accuracy'] > 0.51])}")
        print(f"   • Random/Noisy (49-51%): {len(df_res[(df_res['Accuracy'] >= 0.49) & (df_res['Accuracy'] <= 0.51)])}")
        print("=" * 80)
================================================================================
                ML VALIDATION: AUDITING THE FULL STOCK UNIVERSE                 
================================================================================
📥 Loaded 67 candidates from Long and Short Candidates.
🔎 STARTING AUDIT ON 67 STOCKS...
   (Training AI models for each ticker...)

TICKER     | ACCURACY   | PRIMARY DRIVER  | VERDICT
--------------------------------------------------------------------------------
GOOGL      | 54.07%      | Vol_20D         | ⭐ EDGE FOUND
FISV       | 46.91%      | Vol_20D         | ❌ TRAP
HII        | 56.03%      | Vol_20D         | ⭐⭐ HIGH SIGNAL
CMI        | 53.42%      | Vol_20D         | ⭐ EDGE FOUND
BK         | 58.63%      | Dist_SMA50      | ⭐⭐ HIGH SIGNAL
MU         | 48.21%      | Dist_SMA50      | ❌ TRAP
IDXX       | 53.09%      | RSI             | ⭐ EDGE FOUND
DECK       | 48.21%      | Dist_SMA50      | ❌ TRAP
RTX        | 62.54%      | Dist_SMA50      | ⭐⭐ HIGH SIGNAL
COR        | 53.09%      | Vol_20D         | ⭐ EDGE FOUND
LULU       | 48.21%      | Vol_20D         | ❌ TRAP
LRCX       | 51.47%      | RSI             | ⭐ EDGE FOUND
INCY       | 52.77%      | Vol_20D         | ⭐ EDGE FOUND
HPQ        | 45.60%      | Dist_SMA50      | ❌ TRAP
GIS        | 56.35%      | Dist_SMA50      | ⭐⭐ HIGH SIGNAL
HWM        | 59.61%      | Vol_20D         | ⭐⭐ HIGH SIGNAL
INTC       | 52.77%      | RSI             | ⭐ EDGE FOUND
PCG        | 48.86%      | Vol_20D         | ❌ TRAP
IVZ        | 56.35%      | Vol_20D         | ⭐⭐ HIGH SIGNAL
TPR        | 58.96%      | Dist_SMA50      | ⭐⭐ HIGH SIGNAL
TTD        | 43.00%      | Dist_SMA50      | ❌ TRAP
CAH        | 51.79%      | Vol_20D         | ⭐ EDGE FOUND
CHTR       | 40.39%      | Vol_20D         | ❌ TRAP
MS         | 47.23%      | Vol_20D         | ❌ TRAP
PAYC       | 46.91%      | Dist_SMA50      | ❌ TRAP
JBL        | 53.75%      | RSI             | ⭐ EDGE FOUND
IT         | 37.79%      | RSI             | ❌ TRAP
DLTR       | 54.07%      | Vol_20D         | ⭐ EDGE FOUND
FDS        | 52.44%      | Vol_20D         | ⭐ EDGE FOUND
CVS        | 57.33%      | Dist_SMA50      | ⭐⭐ HIGH SIGNAL
GPN        | 44.95%      | Dist_SMA50      | ❌ TRAP
NEM        | 55.70%      | RSI             | ⭐⭐ HIGH SIGNAL
AMD        | 52.77%      | Vol_20D         | ⭐ EDGE FOUND
WELL       | 57.00%      | Dist_SMA50      | ⭐⭐ HIGH SIGNAL
CVNA       | 46.25%      | Vol_20D         | ❌ TRAP
MNST       | 56.03%      | Vol_20D         | ⭐⭐ HIGH SIGNAL
ZBRA       | 47.23%      | Vol_20D         | ❌ TRAP
MPWR       | 57.00%      | Vol_20D         | ⭐⭐ HIGH SIGNAL
DG         | 53.42%      | Vol_20D         | ⭐ EDGE FOUND
JNJ        | 44.63%      | RSI             | ❌ TRAP
ALB        | 52.12%      | Vol_20D         | ⭐ EDGE FOUND
WDC        | 42.35%      | Vol_20D         | ❌ TRAP
CAT        | 56.03%      | Dist_SMA50      | ⭐⭐ HIGH SIGNAL
HAS        | 55.70%      | Dist_SMA50      | ⭐⭐ HIGH SIGNAL
GDDY       | 43.65%      | Vol_20D         | ❌ TRAP
FIX        | 55.37%      | Dist_SMA50      | ⭐⭐ HIGH SIGNAL
CPB        | 41.04%      | Vol_20D         | ❌ TRAP
XYZ        | 53.42%      | Dist_SMA50      | ⭐ EDGE FOUND
EL         | 51.47%      | Dist_SMA50      | ⭐ EDGE FOUND
CRM        | 46.58%      | Dist_SMA50      | ❌ TRAP
TYL        | 47.88%      | Vol_20D         | ❌ TRAP
BRO        | 46.58%      | Vol_20D         | ❌ TRAP
KLAC       | 56.35%      | RSI             | ⭐⭐ HIGH SIGNAL
CDW        | 45.93%      | Dist_SMA50      | ❌ TRAP
AMAT       | 46.25%      | Dist_SMA50      | ❌ TRAP
APH        | 55.05%      | RSI             | ⭐⭐ HIGH SIGNAL
GS         | 59.93%      | Dist_SMA50      | ⭐⭐ HIGH SIGNAL
--------------------------------------------------------------------------------
✅ AUDIT COMPLETE.
   • Total Scanned: 67
   • Predictable (>51%): 34
   • Random/Noisy (49-51%): 10
================================================================================

I will now present a portfolio long-short that uses only those stock with a predictability score higher that 50%. Then run the optimisation to get the max Sharpe portfolio.

In [20]:
# --- BLOCK 17: ML-ENHANCED FINAL OPTIMIZATION (FULL UNIVERSE LINKED) ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as sco

print("\n" + "="*80)
print(f"{'FINAL STEP: GENERATING THE ML-VERIFIED PORTFOLIOS':^80}")
print("="*80)

# 1. SETUP & DATA COLLECTION
# We need the ML scores (results) and the Full Candidate Lists (container_longs)
if 'results' not in locals():
    print("❌ Error: ML Audit results missing. Please run Block 16 first.")
else:
    # Convert ML results to dictionary for fast lookup
    # 'results' is the list of dicts created in Block 16
    ml_scores = {item['Ticker']: item['Accuracy'] for item in results}

    # 2. FILTER: SURVIVAL OF THE FITTEST (>50% Accuracy)
    print(f"Applying Filter: Rejecting any stock with Predictability <= 50%...")
    
    # Identify Original Candidates (Try Safety Containers first, then live vars)
    if 'container_longs' in locals():
        orig_longs = container_longs
        orig_shorts = container_shorts
    elif 'final_longs' in locals():
        orig_longs = final_longs
        orig_shorts = final_shorts
    else:
        orig_longs = []
        orig_shorts = []
        print("⚠️ Warning: No candidate lists found from Block 4.")

    survivor_longs = []
    survivor_shorts = []
    rejected = []

    # Filter Longs
    for t in orig_longs:
        score = ml_scores.get(t, 0.50) # Default to 0.50 if missing
        if score > 0.50:
            survivor_longs.append(t)
        else:
            rejected.append(f"{t}")

    # Filter Shorts
    for t in orig_shorts:
        score = ml_scores.get(t, 0.50)
        if score > 0.50:
            survivor_shorts.append(t)
        else:
            rejected.append(f"{t}")

    print(f"\n🗑️  REJECTED ({len(rejected)}): {', '.join(rejected[:10])}..." if rejected else "   All stocks passed!")
    print(f"✅ SURVIVORS: {len(survivor_longs)} Longs, {len(survivor_shorts)} Shorts")

    # 3. RE-OPTIMIZATION ENGINE
    def run_smart_optimizer(tickers, side="LONG"):
        if not tickers: return pd.Series(dtype=float)
        
        # SMART SAMPLING:
        # If we have too many survivors (e.g., 40), the optimizer might act weird.
        # We select the Top 15 Survivors based on pure Momentum to feed into the optimizer.
        subset_mom = (prices_stocks[tickers].iloc[-1] / prices_stocks[tickers].iloc[-252]) - 1
        
        if side == "LONG":
            # Best 15 Winners
            best_survivors = subset_mom.nlargest(min(15, len(tickers))).index.tolist()
        else:
            # Worst 15 Losers (Best Shorts)
            best_survivors = subset_mom.nsmallest(min(15, len(tickers))).index.tolist()
            
        # Get Data for optimization
        subset = prices_stocks[best_survivors].iloc[-252:]
        rets = subset.pct_change(fill_method=None).fillna(0)
        
        if side == "SHORT": rets = rets * -1
            
        mu = rets.mean() * 252
        sigma = rets.cov() * 252
        n = len(best_survivors)
        rf = 0.04
        
        # Max Sharpe Optimization
        def neg_sharpe(w):
            p_ret = np.sum(mu * w)
            p_vol = np.sqrt(np.dot(w.T, np.dot(sigma, w)))
            if p_vol == 0: return 0
            return - (p_ret - rf) / p_vol

        try:
            cons = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
            bnds = tuple((0.0, 1.0) for _ in range(n))
            init_guess = [1/n] * n
            res = sco.minimize(neg_sharpe, init_guess, method='SLSQP', bounds=bnds, constraints=cons)
            return pd.Series(res.x, index=best_survivors)
        except:
            return pd.Series(1/n, index=best_survivors)

    print("\n... Re-calculating Optimal Weights for Survivors ...")
    
    # Optimize Longs
    w_smart_long = run_smart_optimizer(survivor_longs, "LONG")
    
    # Optimize Shorts
    w_smart_short = run_smart_optimizer(survivor_shorts, "SHORT")

    # 4. FINAL EXECUTION REPORT
    print("\n" + "="*80)
    print(f"{'🚀 FINAL EXECUTION LIST (ML-VERIFIED) 🚀':^80}")
    print("="*80)

    # REPORT A: SMART LONG PORTFOLIO
    if not w_smart_long.empty:
        print(f"\n>> SMART LONG PORTFOLIO (High Confidence Winners)")
        print("-" * 80)
        print(f"{'TICKER':<10} | {'ALLOCATION':<12} | {'ML SCORE':<10} | {'STATUS'}")
        print("-" * 80)
        
        sorted_lo = w_smart_long[w_smart_long > 0.001].sort_values(ascending=False)
        for t, w in sorted_lo.items():
            score = ml_scores.get(t, 0.0)
            print(f"{t:<10} | {w:.2%}      | {score:.1%}      | Trusted ✅")
        print("-" * 80)
        print("Note: Percentages sum to 100% of your LONG capital.")

    # REPORT B: SMART SHORT PORTFOLIO
    if not w_smart_short.empty:
        print(f"\n>> SMART SHORT PORTFOLIO (Predictable Losers)")
        print("-" * 80)
        print(f"{'TICKER':<10} | {'ALLOCATION':<12} | {'ML SCORE':<10} | {'STATUS'}")
        print("-" * 80)
        
        sorted_sh = w_smart_short[w_smart_short > 0.001].sort_values(ascending=False)
        for t, w in sorted_sh.items():
            score = ml_scores.get(t, 0.0)
            print(f"{t:<10} | {w:.2%}      | {score:.1%}      | Trusted ✅")
        print("-" * 80)
        print("Note: Percentages sum to 100% of your SHORT capital.")

    # 5. VISUAL COMPARISON (Impact of ML)
    if not w_smart_long.empty:
        top_pick = sorted_lo.index[0]
        
        # Simple Before/After comparison
        # Before ML, we assume an equal weight across the original broad list (~1/47)
        N_original = len(orig_longs) if orig_longs else 1
        old_w = 1.0 / N_original
        new_w = sorted_lo.iloc[0]
        
        plt.figure(figsize=(8, 4))
        plt.bar(["Before ML (1/N)", "After ML (Optimized)"], [old_w, new_w], color=['gray', '#00c853'])
        plt.title(f"Impact of ML Filtering on Top Pick: {top_pick}")
        plt.ylabel(f"Allocation Weight")
        plt.grid(axis='y', alpha=0.3)
        plt.show()
        
        print("\n💡 VERDICT:")
        print("The optimizer has concentrated your capital.")
        print(f"Instead of buying {N_original} stocks blindly, you are betting big on the survivors.")
================================================================================
               FINAL STEP: GENERATING THE ML-VERIFIED PORTFOLIOS                
================================================================================
Applying Filter: Rejecting any stock with Predictability <= 50%...

🗑️  REJECTED (29): WDC, MU, CVNA, GLW, TEL, GOOG, CHRW, AMAT, TER, JNJ...
✅ SURVIVORS: 32 Longs, 6 Shorts

... Re-calculating Optimal Weights for Survivors ...

================================================================================
                     🚀 FINAL EXECUTION LIST (ML-VERIFIED) 🚀                     
================================================================================

>> SMART LONG PORTFOLIO (High Confidence Winners)
--------------------------------------------------------------------------------
TICKER     | ALLOCATION   | ML SCORE   | STATUS
--------------------------------------------------------------------------------
DG         | 29.03%      | 53.4%      | Trusted ✅
NEM        | 21.45%      | 55.7%      | Trusted ✅
HII        | 14.37%      | 56.0%      | Trusted ✅
STX        | 12.78%      | 50.8%      | Trusted ✅
INTC       | 7.80%      | 52.8%      | Trusted ✅
APH        | 7.24%      | 55.0%      | Trusted ✅
FIX        | 5.71%      | 55.4%      | Trusted ✅
TPR        | 0.92%      | 59.0%      | Trusted ✅
LRCX       | 0.70%      | 51.5%      | Trusted ✅
--------------------------------------------------------------------------------
Note: Percentages sum to 100% of your LONG capital.

>> SMART SHORT PORTFOLIO (Predictable Losers)
--------------------------------------------------------------------------------
TICKER     | ALLOCATION   | ML SCORE   | STATUS
--------------------------------------------------------------------------------
FDS        | 46.61%      | 52.4%      | Trusted ✅
DVA        | 40.44%      | 50.2%      | Trusted ✅
GIS        | 11.83%      | 56.4%      | Trusted ✅
PYPL       | 1.12%      | 50.8%      | Trusted ✅
--------------------------------------------------------------------------------
Note: Percentages sum to 100% of your SHORT capital.
No description has been provided for this image
💡 VERDICT:
The optimizer has concentrated your capital.
Instead of buying 43 stocks blindly, you are betting big on the survivors.
In [21]:
# --- BLOCK 18: VERIFICATION (FULL PERFORMANCE AUDIT WITH WEIGHTS) ---
print("\n" + "="*80)
print(f"{'REALITY CHECK: RECENT PERFORMANCE OF ML PICKS':^80}")
print("="*80)

# 1. SETUP VERIFICATION WINDOW (Last 21 Days)
if 'prices_stocks' in locals() and len(prices_stocks) > 25:
    end_date = prices_stocks.index[-1]
    start_date = prices_stocks.index[-21] # Go back exactly 21 trading days
    
    print(f"⚙️ VERIFICATION WINDOW:")
    print(f"   • Checking trend from: {start_date.date()}")
    print(f"   • To latest close:     {end_date.date()}")
    print("   • Goal: Did the 'Survivors' actually outperform?")

    # 2. RETRIEVE & CONSTRUCT PORTFOLIOS
    portfolios = {}
    
    # A. Strategy 1: Smart Long-Only
    if 'w_smart_long' in locals() and not w_smart_long.empty:
        portfolios['Strategy 1 (Smart Long)'] = w_smart_long
    elif 'w_forecast_lo' in locals():
        portfolios['Strategy 1 (Forecast)'] = w_forecast_lo

    # B. Strategy 2: Smart Market Neutral
    # We combine Longs and Shorts if both exist
    if 'w_smart_long' in locals() and 'w_smart_short' in locals():
        if not w_smart_long.empty and not w_smart_short.empty:
            # Combine: 50% Long, 50% Short (Dollar Neutral)
            w_neutral = (w_smart_long * 0.5).add(w_smart_short * -0.5, fill_value=0)
            portfolios['Strategy 2 (Smart Hedge)'] = w_neutral

    if not portfolios:
        print("⚠️ No portfolios found. Please run Block 17 first.")
    else:
        # 3. RUN PERFORMANCE TEST
        # Slice data to the verification window
        test_prices = prices_stocks.loc[start_date:end_date]
        test_rets = test_prices.pct_change().fillna(0)
        
        # Benchmark (S&P 500)
        try:
            spy_ver = yf.download("SPY", start=start_date, end=end_date, progress=False, auto_adjust=True)
            # Handle yfinance MultiIndex issue
            if isinstance(spy_ver.columns, pd.MultiIndex):
                spy_ver = spy_ver.xs('Close', axis=1, level=0).iloc[:, 0]
            elif 'Close' in spy_ver.columns:
                spy_ver = spy_ver['Close']
            
            spy_ver = spy_ver.reindex(test_prices.index).ffill()
            spy_curve = 10000 * (1 + spy_ver.pct_change().fillna(0)).cumprod()
            spy_total = (spy_curve.iloc[-1]/10000) - 1
        except:
            spy_curve = pd.Series(10000, index=test_prices.index)
            spy_total = 0.0

        # Plotting Setup
        plt.figure(figsize=(12, 6))
        plt.plot(spy_curve, color='gray', linestyle='--', label='S&P 500 (Benchmark)', alpha=0.6)

        print("\n" + "-"*60)
        print(f"{'STRATEGY':<30} | {'RETURN (21d)':<12} | {'ALPHA'}")
        print("-" * 60)

        # Loop through strategies
        for name, weights in portfolios.items():
            # Align weights to available data
            valid_assets = [t for t in weights.index if t in test_rets.columns]
            aligned_w = weights[valid_assets].reindex(test_rets.columns).fillna(0)
            
            # Calculate Return
            # For Market Neutral, add Risk Free Rate (approx 4%/year)
            rf_adder = (0.04/252) if "Hedge" in name else 0.0
            
            strat_daily = test_rets.dot(aligned_w) + rf_adder
            strat_curve = 10000 * (1 + strat_daily).cumprod()
            strat_total = (strat_curve.iloc[-1]/10000) - 1
            
            # Print Stats
            alpha = strat_total - spy_total
            sign = "+" if alpha > 0 else ""
            print(f"{name:<30} | {strat_total:+.2%}      | {sign}{alpha:+.2%}")
            
            # Plot
            color = '#00c853' if "Long" in name else '#6200ea' # Green for Long, Purple for Hedge
            plt.plot(strat_curve, linewidth=2, label=name, color=color)

        print("-" * 60)
        print(f"Benchmark (S&P 500)            | {spy_total:+.2%}      | 0.00%")
        print("-" * 60)

        plt.title(f"Truth Test: Out-of-Sample Performance\n({start_date.date()} to {end_date.date()})")
        plt.ylabel("Portfolio Value (£)")
        plt.legend()
        plt.grid(True, alpha=0.3)
        plt.tight_layout()
        plt.show()

        # 4. DEEP DIVE: INDIVIDUAL BREAKDOWNS
        asset_rets = (test_prices.iloc[-1] / test_prices.iloc[0]) - 1

        # A. STRATEGY 1 CHECK (LONG ONLY)
        if 'Strategy 1 (Smart Long)' in portfolios:
            w_check = portfolios['Strategy 1 (Smart Long)']
            print(f"\n>> BREAKDOWN: STRATEGY 1 (LONG ONLY)")
            print(f"   Goal: Price should go UP (+)")
            print("-" * 75)
            print(f"   {'TICKER':<10} | {'WEIGHT':<10} | {'PRICE MOVE':<12} | {'RESULT'}")
            print("-" * 75)
            
            sorted_w = w_check.sort_values(ascending=False)
            for t, w in sorted_w.items():
                if t in asset_rets:
                    perf = asset_rets[t]
                    status = "✅ PROFIT" if perf > 0 else "❌ LOSS"
                    print(f"   {t:<10} | {w:.2%}      | {perf:+.2%}      | {status}")
            print("-" * 75)

        # B. STRATEGY 2 CHECK (HEDGE)
        if 'Strategy 2 (Smart Hedge)' in portfolios:
            w_check = portfolios['Strategy 2 (Smart Hedge)']
            print(f"\n>> BREAKDOWN: STRATEGY 2 (HEDGED)")
            print(f"   Goal: Longs UP (+), Shorts DOWN (-)")
            print("-" * 75)
            print(f"   {'TICKER':<10} | {'POSITION':<8} | {'WEIGHT':<8} | {'PRICE MOVE':<12} | {'RESULT'}")
            print("-" * 75)
            
            # Sort by absolute weight to see biggest bets first
            sorted_w = w_check.abs().sort_values(ascending=False)
            
            for t in sorted_w.index:
                w = w_check[t]
                if t in asset_rets:
                    perf = asset_rets[t]
                    
                    if w > 0: # LONG POSITION
                        pos_type = "LONG"
                        status = "✅ PROFIT" if perf > 0 else "❌ LOSS"
                    else: # SHORT POSITION
                        pos_type = "SHORT"
                        status = "✅ PROFIT" if perf < 0 else "❌ LOSS"
                    
                    # We print abs(w) for cleaner reading, pos_type tells direction
                    print(f"   {t:<10} | {pos_type:<8} | {abs(w):.2%}   | {perf:+.2%}      | {status}")
            print("-" * 75)

else:
    print("⚠️ Price history too short to run verification.")
================================================================================
                 REALITY CHECK: RECENT PERFORMANCE OF ML PICKS                  
================================================================================
⚙️ VERIFICATION WINDOW:
   • Checking trend from: 2025-12-24
   • To latest close:     2026-01-21
   • Goal: Did the 'Survivors' actually outperform?

------------------------------------------------------------
STRATEGY                       | RETURN (21d) | ALPHA
------------------------------------------------------------
Strategy 1 (Smart Long)        | +15.46%      | ++17.31%
Strategy 2 (Smart Hedge)       | +10.96%      | ++12.81%
------------------------------------------------------------
Benchmark (S&P 500)            | -1.85%      | 0.00%
------------------------------------------------------------
No description has been provided for this image
>> BREAKDOWN: STRATEGY 1 (LONG ONLY)
   Goal: Price should go UP (+)
---------------------------------------------------------------------------
   TICKER     | WEIGHT     | PRICE MOVE   | RESULT
---------------------------------------------------------------------------
   DG         | 29.03%      | +9.49%      | ✅ PROFIT
   NEM        | 21.45%      | +14.37%      | ✅ PROFIT
   HII        | 14.37%      | +17.75%      | ✅ PROFIT
   STX        | 12.78%      | +15.09%      | ✅ PROFIT
   INTC       | 7.80%      | +35.25%      | ✅ PROFIT
   APH        | 7.24%      | +11.22%      | ✅ PROFIT
   FIX        | 5.71%      | +19.28%      | ✅ PROFIT
   TPR        | 0.92%      | -0.09%      | ❌ LOSS
   LRCX       | 0.70%      | +26.31%      | ✅ PROFIT
   KLAC       | 0.00%      | +17.21%      | ✅ PROFIT
   HWM        | 0.00%      | +4.60%      | ✅ PROFIT
   AMD        | 0.00%      | +8.62%      | ✅ PROFIT
   ALB        | 0.00%      | +17.01%      | ✅ PROFIT
   DLTR       | 0.00%      | +10.66%      | ✅ PROFIT
   CAT        | 0.00%      | +8.77%      | ✅ PROFIT
---------------------------------------------------------------------------

>> BREAKDOWN: STRATEGY 2 (HEDGED)
   Goal: Longs UP (+), Shorts DOWN (-)
---------------------------------------------------------------------------
   TICKER     | POSITION | WEIGHT   | PRICE MOVE   | RESULT
---------------------------------------------------------------------------
   FDS        | SHORT    | 23.30%   | -3.82%      | ✅ PROFIT
   DVA        | SHORT    | 20.22%   | -8.51%      | ✅ PROFIT
   DG         | LONG     | 14.51%   | +9.49%      | ✅ PROFIT
   NEM        | LONG     | 10.72%   | +14.37%      | ✅ PROFIT
   HII        | LONG     | 7.19%   | +17.75%      | ✅ PROFIT
   STX        | LONG     | 6.39%   | +15.09%      | ✅ PROFIT
   GIS        | SHORT    | 5.91%   | -3.41%      | ✅ PROFIT
   INTC       | LONG     | 3.90%   | +35.25%      | ✅ PROFIT
   APH        | LONG     | 3.62%   | +11.22%      | ✅ PROFIT
   FIX        | LONG     | 2.85%   | +19.28%      | ✅ PROFIT
   PYPL       | SHORT    | 0.56%   | -7.61%      | ✅ PROFIT
   TPR        | LONG     | 0.46%   | -0.09%      | ❌ LOSS
   LRCX       | LONG     | 0.35%   | +26.31%      | ✅ PROFIT
   KLAC       | LONG     | 0.00%   | +17.21%      | ✅ PROFIT
   XYZ        | SHORT    | 0.00%   | -4.50%      | ✅ PROFIT
   HWM        | LONG     | 0.00%   | +4.60%      | ✅ PROFIT
   ALB        | SHORT    | 0.00%   | +17.01%      | ❌ LOSS
   CAT        | SHORT    | 0.00%   | +8.77%      | ❌ LOSS
   AMD        | SHORT    | 0.00%   | +8.62%      | ❌ LOSS
   DLTR       | SHORT    | 0.00%   | +10.66%      | ❌ LOSS
   SWKS       | SHORT    | 0.00%   | -10.37%      | ✅ PROFIT
---------------------------------------------------------------------------

ML filtered stock portfolio - LONG ONLY STRATEGY¶

# PARAMETERS
# We use 252 days for Momentum (Standard) and 126 days (6 months) for Risk calculation
LOOKBACK_MOMENTUM = 252   
LOOKBACK_RISK     = 126   
MAX_WEIGHT_PER_ASSET = 0.20  # Diversification Rule: Max 20% in one stock
RISK_FREE_RATE    = last_rf     # last Annual Risk-Free Rate
In [22]:
# --- BLOCK 19: END-TO-END PORTFOLIO BUILDER (MOMENTUM + MAX SHARPE + DIVERSIFICATION) ---
print("\n" + "="*80)
print(f"{'LONG ONLY PORTFOLIO: FROM SURVIVORS TO OPTIMIZED DIVERSIFIED PORTFOLIO':^80}")
print("="*80)

# --- 1. SETTINGS & DATA SETUP ---
if 'prices_stocks' not in locals() or prices_stocks.empty:
    print("❌ Error: 'prices_stocks' data not found.")
else:
    analysis_date = prices_stocks.index[-1]
    
    # PARAMETERS
    # We use 252 days for Momentum (Standard) and 126 days (6 months) for Risk calculation
    LOOKBACK_MOMENTUM = 252   
    LOOKBACK_RISK     = 126   
    MAX_WEIGHT_PER_ASSET = 0.20  # Diversification Rule: Max 20% in one stock
    RISK_FREE_RATE    = last_rf     # last Annual Risk-Free Rate

    print(f"⚙️ BUILD PARAMETERS:")
    print(f"   • Optimization Date: {analysis_date.date()}")
    print(f"   • Diversification:   Max {MAX_WEIGHT_PER_ASSET:.0%} per asset (Hard Cap)")
    print(f"   • Objective:         Maximize Sharpe Ratio (Smart Risk-Adjusted Return)")

    # --- 2. STEP A: SELECTION (RETRIEVE THE SURVIVORS) ---
    # We prioritize the list from Block 17 (ML-Verified).
    # If missing, we fallback to a raw Momentum scan.
    
    candidates = []
    source = "Unknown"

    if 'survivor_longs' in locals() and survivor_longs:
        candidates = survivor_longs
        source = "ML-Verified Survivors (Block 17)"
        print(f"\n📥 LOADED CANDIDATES FROM PREVIOUS STEP:")
        print(f"   • Source: {source}")
        print(f"   • Count:  {len(candidates)} Tickers")
        
    elif 'final_longs' in locals() and final_longs:
        candidates = final_longs
        source = "Regime Filter Survivors (Block 4)"
        print(f"\n📥 LOADED CANDIDATES FROM BLOCK 4:")
        print(f"   • Source: {source}")
        print(f"   • Count:  {len(candidates)} Tickers")
        
    else:
        # Fallback: Fresh Scan
        print(f"\n⚠️ No prior lists found. Running fresh Momentum Scan (Top 15)...")
        hist = prices_stocks.iloc[-LOOKBACK_MOMENTUM:]
        mom = (hist.iloc[-1] / hist.iloc[0]) - 1
        candidates = mom.sort_values(ascending=False).head(15).index.tolist()
        source = "Raw Momentum Scan"

    # --- 3. STEP B: OPTIMIZATION MATH (THE "BRAIN") ---
    if not candidates:
        print("❌ No candidates available to build portfolio.")
    else:
        print(f"\n... calculating covariance matrix for {len(candidates)} assets ...")

        # Get Data for Covariance (Recent 6 Months for stability)
        p_opt = prices_stocks[candidates].iloc[-LOOKBACK_RISK:]
        r_opt = p_opt.pct_change().fillna(0) # Simple returns for optimization
        
        # Annualized Inputs
        mu = r_opt.mean() * 252
        cov = r_opt.cov() * 252
        num_assets = len(candidates)

        # Define the Objective Function (Negative Sharpe)
        def neg_sharpe(w):
            ret = np.sum(w * mu)
            vol = np.sqrt(np.dot(w.T, np.dot(cov, w)))
            if vol == 0: return 0
            return -(ret - RISK_FREE_RATE) / vol

        # Constraints
        # 1. Sum of weights must equal 1 (100% Invested)
        constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})

        # 2. Bounds (Diversification Rule)
        # Each asset must be between 0% and MAX_WEIGHT
        bounds = tuple((0.0, MAX_WEIGHT_PER_ASSET) for _ in range(num_assets))

        # Initial Guess (Equal Weight)
        init_guess = [1. / num_assets] * num_assets

        print("... solving for Max Sharpe Ratio ...")
        
        try:
            result = sco.minimize(neg_sharpe, init_guess, method='SLSQP', bounds=bounds, constraints=constraints)
            
            if result.success:
                w_final = pd.Series(result.x, index=candidates)
                
                # Filter "Dust" (Weights < 0.1% are essentially zero)
                w_final = w_final[w_final > 0.001].sort_values(ascending=False)
                
                # Calculate Final Portfolio Stats
                final_ret = np.sum(w_final * mu[w_final.index])
                final_vol = np.sqrt(np.dot(w_final.T, np.dot(cov.loc[w_final.index, w_final.index], w_final)))
                final_sharpe = (final_ret - RISK_FREE_RATE) / final_vol
                
                # --- 4. STEP C: FINAL OUTPUT REPORT ---
                print("\n" + "="*80)
                print(f"{'🏆 FINAL OPTIMIZED PORTFOLIO 🏆':^80}")
                print("="*80)
                
                print(f"PORTFOLIO METRICS (ANNUALIZED PREDICTION):")
                print(f"   • Expected Return: {final_ret:.2%}")
                print(f"   • Risk (Vol):      {final_vol:.2%}")
                print(f"   • Sharpe Ratio:    {final_sharpe:.2f}")
                print("-" * 80)
                
                print(f"{'TICKER':<10} | {'WEIGHT':<10} | {'SHARES (Approx £10k)'}")
                print("-" * 80)
                
                # Assuming £10,000 capital for "Shares" example
                capital = 10000 
                current_prices = prices_stocks.iloc[-1]
                
                for t, w in w_final.items():
                    # Calculate approx shares (Rounded to 2 decimals)
                    price = current_prices.get(t, 100)
                    amt = capital * w
                    shares = round(amt / price, 2) if price > 0 else 0
                    
                    print(f"{t:<10} | {w:.2%}     | {shares} shares (£{amt:.2f})")
                    
                print("-" * 80)
                print(f"TOTAL ALLOCATION: {w_final.sum():.2%}")
                
                # Visualization
                plt.figure(figsize=(12, 6))
                
                # Bar Chart
                colors = ['#00c853' if w >= MAX_WEIGHT_PER_ASSET - 0.01 else '#2962ff' for w in w_final.values]
                bars = plt.bar(w_final.index, w_final.values, color=colors, alpha=0.8, edgecolor='black')
                
                # Max Cap Line
                plt.axhline(MAX_WEIGHT_PER_ASSET, color='red', linestyle='--', linewidth=2, label=f'Safety Cap ({MAX_WEIGHT_PER_ASSET:.0%})')
                
                plt.title("Final Portfolio Allocation (Optimized & Capped)", fontsize=14, fontweight='bold')
                plt.ylabel("Weight in Portfolio")
                plt.legend()
                plt.grid(axis='y', alpha=0.3)
                
                # Annotate bars
                for bar in bars:
                    height = bar.get_height()
                    plt.text(bar.get_x() + bar.get_width()/2., height + 0.005,
                             f'{height:.1%}', ha='center', va='bottom', fontsize=9, fontweight='bold')
                
                plt.tight_layout()
                plt.show()
                
                # Save to global variable for later use in Audit (Block 25)
                w_smart_lo = w_final
                
            else:
                print("❌ Optimization Failed: Could not find a solution that satisfies constraints.")

        except Exception as e:
            print(f"❌ Optimization Error: {e}")
================================================================================
     LONG ONLY PORTFOLIO: FROM SURVIVORS TO OPTIMIZED DIVERSIFIED PORTFOLIO     
================================================================================
⚙️ BUILD PARAMETERS:
   • Optimization Date: 2026-01-21
   • Diversification:   Max 20% per asset (Hard Cap)
   • Objective:         Maximize Sharpe Ratio (Smart Risk-Adjusted Return)

📥 LOADED CANDIDATES FROM PREVIOUS STEP:
   • Source: ML-Verified Survivors (Block 17)
   • Count:  32 Tickers

... calculating covariance matrix for 32 assets ...
... solving for Max Sharpe Ratio ...

================================================================================
                         🏆 FINAL OPTIMIZED PORTFOLIO 🏆                          
================================================================================
PORTFOLIO METRICS (ANNUALIZED PREDICTION):
   • Expected Return: 93.90%
   • Risk (Vol):      13.11%
   • Sharpe Ratio:    7.16
--------------------------------------------------------------------------------
TICKER     | WEIGHT     | SHARES (Approx £10k)
--------------------------------------------------------------------------------
MNST       | 20.00%     | 32.94 shares (£2000.00)
CVS        | 16.70%     | 27.97 shares (£1669.57)
HII        | 13.20%     | 4.26 shares (£1320.27)
DG         | 10.02%     | 9.17 shares (£1002.08)
GOOGL      | 8.88%     | 3.7 shares (£888.13)
INTC       | 7.23%     | 19.97 shares (£722.75)
NEM        | 6.49%     | 7.32 shares (£648.92)
CAH        | 4.93%     | 3.15 shares (£493.28)
ALB        | 4.00%     | 3.11 shares (£399.73)
STX        | 2.68%     | 1.1 shares (£268.08)
CMI        | 2.34%     | 0.55 shares (£234.01)
WELL       | 1.20%     | 0.86 shares (£119.82)
RTX        | 0.86%     | 0.59 shares (£85.72)
LRCX       | 0.78%     | 0.47 shares (£77.53)
INCY       | 0.70%     | 0.92 shares (£70.11)
--------------------------------------------------------------------------------
TOTAL ALLOCATION: 100.00%
No description has been provided for this image

Next block performs a "Deep Dive Risk Audit" on the portfolio just built.

It simulates holding the optimized portfolio over the last 12 months to calculate professional-grade risk metrics. This tells you why your portfolio made money:

Beta: Did it just ride the market wave?

Alpha: Did it actually beat the market through skill?

Treynor Ratio: Was the return worth the specific risk taken?

In [23]:
# --- BLOCK 20: 1-YEAR HISTORICAL AUDIT (ADVANCED METRICS) ---
print("\n" + "="*80)
print(f"{'HISTORICAL AUDIT: DEEP DIVE RISK METRICS':^80}")
print("="*80)

# 1. SETUP: DYNAMIC DATES (LAST 1 YEAR)
if 'prices_stocks' in locals() and not prices_stocks.empty:
    end_date = prices_stocks.index[-1]
    start_date = end_date - pd.DateOffset(years=1)
    
    if start_date < prices_stocks.index[0]:
        start_date = prices_stocks.index[0]
else:
    print("⚠️ Error: Price data not found. Please run Block 1 first.")
    end_date = pd.Timestamp.now()
    start_date = end_date - pd.DateOffset(years=1)

print(f"📅 AUDIT PERIOD: {start_date.date()} -> {end_date.date()}")

# 2. RETRIEVE OPTIMIZED WEIGHTS
if 'w_smart_lo' in locals() and w_smart_lo is not None:
    weights = w_smart_lo
    print(f"   • Portfolio: Optimized Diversified Model ({len(weights)} Assets)")
else:
    print("⚠️ Optimized weights not found. Run Block 19 first.")
    # Fallback for testing: Equal weight on first 5 stocks
    weights = pd.Series([0.2]*5, index=prices_stocks.columns[:5]) 

# 3. CALCULATE DATA
# Slice data
hist_prices = prices_stocks.loc[start_date:end_date]
hist_rets = hist_prices.pct_change().fillna(0)

# Portfolio Returns
valid_assets = [t for t in weights.index if t in hist_rets.columns]
aligned_w = weights[valid_assets].reindex(hist_rets.columns).fillna(0)
port_daily = hist_rets.dot(aligned_w)
equity_curve = 10000 * (1 + port_daily).cumprod()

# Benchmark Returns (S&P 500)
try:
    spy_data = yf.download("SPY", start=start_date, end=end_date, progress=False, auto_adjust=True)
    
    # Robust handling for yfinance MultiIndex columns
    if isinstance(spy_data.columns, pd.MultiIndex):
        spy_close = spy_data.xs('Close', axis=1, level=0).iloc[:, 0]
    elif 'Close' in spy_data.columns:
        spy_close = spy_data['Close']
    else:
        spy_close = spy_data.iloc[:, 0]
    
    spy_close = spy_close.reindex(hist_prices.index).ffill()
    spy_daily = spy_close.pct_change().fillna(0)
    spy_curve = 10000 * (1 + spy_daily).cumprod()
    has_spy = True
except Exception as e:
    print(f"⚠️ Benchmark Warning: {e}")
    has_spy = False
    spy_daily = pd.Series(0, index=hist_prices.index)
    spy_curve = pd.Series(10000, index=hist_prices.index)

# 4. CALCULATE METRICS
# Risk Free Rate (4%)
rf = last_rf 

# A. Basic Stats
port_total_ret = (equity_curve.iloc[-1] / 10000) - 1
spy_total_ret = (spy_curve.iloc[-1] / 10000) - 1

port_vol = port_daily.std() * np.sqrt(252)
spy_vol = spy_daily.std() * np.sqrt(252)

# Max Drawdown
port_dd = (equity_curve / equity_curve.cummax()) - 1
port_max_dd = port_dd.min()
spy_dd = (spy_curve / spy_curve.cummax()) - 1
spy_max_dd = spy_dd.min()

# B. Advanced Stats (Beta, Alpha, Treynor)
if has_spy:
    # Covariance & Beta
    covariance = port_daily.cov(spy_daily)
    variance = spy_daily.var()
    beta = covariance / variance
    
    # Correlation
    correlation = port_daily.corr(spy_daily)
    
    # Jensen's Alpha (Annualized)
    # Alpha = Return - (Rf + Beta * (Market_Return - Rf))
    alpha_jensen = port_total_ret - (rf + beta * (spy_total_ret - rf))
    
    # Treynor Measure
    # Treynor = (Return - Rf) / Beta
    if abs(beta) > 0.01:
        treynor = (port_total_ret - rf) / beta
    else:
        treynor = 0.0
        
    # Sharpe Ratio
    sharpe = (port_total_ret - rf) / port_vol if port_vol > 0 else 0
    spy_sharpe = (spy_total_ret - rf) / spy_vol if spy_vol > 0 else 0
else:
    beta, correlation, alpha_jensen, treynor, sharpe = 0, 0, 0, 0, 0
    spy_sharpe = 0

# 5. REPORT TABLE
print("\n" + "="*80)
print(f"{'📊 ADVANCED PERFORMANCE REPORT (1 YEAR BACKTEST) 📊':^80}")
print("="*80)
print(f"{'METRIC':<25} | {'PORTFOLIO':<15} | {'BENCHMARK (SPY)'}")
print("-" * 80)
# Basics
print(f"{'Total Return':<25} | {port_total_ret:+.2%}        | {spy_total_ret:+.2%}")
print(f"{'Annual Volatility':<25} | {port_vol:.2%}          | {spy_vol:.2%}")
print(f"{'Max Drawdown':<25} | {port_max_dd:.2%}          | {spy_max_dd:.2%}")
print("-" * 80)
# Risk-Adjusted
print(f"{'Sharpe Ratio':<25} | {sharpe:.2f}            | {spy_sharpe:.2f}")
print(f"{'Treynor Measure':<25} | {treynor:.2f}            | --")
print(f"{'Jensen\'s Alpha':<25} | {alpha_jensen:+.2%}        | --")
print("-" * 80)
# Market Relationship
print(f"{'Beta':<25} | {beta:.2f}            | 1.00")
print(f"{'Correlation':<25} | {correlation:.2f}            | 1.00")
print("=" * 80)

# 6. EXPLANATION OF NEW METRICS
print("\n📝 METRIC DECODER:")

if beta < 1.0:
    print(f"🛡️ BETA ({beta:.2f}): Your portfolio is LESS volatile than the S&P 500.")
else:
    print(f"🔥 BETA ({beta:.2f}): Your portfolio is MORE volatile than the S&P 500.")

print(f"🔗 CORRELATION ({correlation:.2f}): 1.0 means you move exactly with the market.")
print(f"   (Lower is better for diversification/hedging).")

# 7. VISUALIZATION
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8), sharex=True, gridspec_kw={'height_ratios': [3, 1]})

# Top Chart: Growth
ax1.plot(equity_curve, color='#00c853', linewidth=2, label='Optimized Portfolio')
if has_spy:
    ax1.plot(spy_curve, color='gray', linestyle='--', label='S&P 500', alpha=0.7)
ax1.set_title(f"Growth of £10,000 (Last 12 Months)", fontsize=12, fontweight='bold')
ax1.set_ylabel("Portfolio Value (£)")
ax1.legend(loc="upper left")
ax1.grid(True, alpha=0.3)

# Bottom Chart: Drawdown
ax2.fill_between(port_dd.index, port_dd, 0, color='red', alpha=0.3, label='Portfolio Drawdown')
if has_spy:
    ax2.plot(spy_dd, color='gray', linestyle='--', linewidth=1, alpha=0.5, label='SPY Drawdown')

ax2.set_title(f"Risk Profile (Max Drawdown: {port_max_dd:.2%})", fontsize=10, fontweight='bold')
ax2.set_ylabel("Drop from Peak")
ax2.grid(True, alpha=0.3)
ax2.set_xlabel("Date")

plt.tight_layout()
plt.show()
================================================================================
                    HISTORICAL AUDIT: DEEP DIVE RISK METRICS                    
================================================================================
📅 AUDIT PERIOD: 2025-01-21 -> 2026-01-21
   • Portfolio: Optimized Diversified Model (15 Assets)

================================================================================
               📊 ADVANCED PERFORMANCE REPORT (1 YEAR BACKTEST) 📊                
================================================================================
METRIC                    | PORTFOLIO       | BENCHMARK (SPY)
--------------------------------------------------------------------------------
Total Return              | +81.58%        | +13.68%
Annual Volatility         | 17.56%          | 18.92%
Max Drawdown              | -8.72%          | -18.76%
--------------------------------------------------------------------------------
Sharpe Ratio              | 4.64            | 0.72
Treynor Measure           | 1.39            | --
Jensen's Alpha            | +73.55%        | --
--------------------------------------------------------------------------------
Beta                      | 0.59            | 1.00
Correlation               | 0.63            | 1.00
================================================================================

📝 METRIC DECODER:
🛡️ BETA (0.59): Your portfolio is LESS volatile than the S&P 500.
🔗 CORRELATION (0.63): 1.0 means you move exactly with the market.
   (Lower is better for diversification/hedging).
No description has been provided for this image

Let's assume we bought this portfolio of stocks 4 trading weeks ago - THIS WILL HAVE A LOOK-AHEAD BIAS, HOWEVER WE CAN UNDERSTAND THE CORRELATION OF THIS PORTFOLIO

LONG-SHORT HEDGED 0 cost portfolio¶

In [24]:
# --- BLOCK 21: FINAL PRODUCTION EXECUTION (LONG-SHORT HEDGED - zero cost portfolio) ---
print("\n" + "="*80)
print(f"{'🚀 FINAL PRODUCTION RUN: LONG-SHORT HEDGED ORDERS 🚀':^80}")
print("="*80)

# 1. SETUP: DEFINING "NOW"
if 'prices_stocks' in locals() and not prices_stocks.empty:
    analysis_date = prices_stocks.index[-1]
else:
    print("⚠️ Error: 'prices_stocks' data not found.")
    analysis_date = pd.Timestamp.now()

print(f"⚙️ EXECUTION PARAMETERS:")
print(f"   • Analysis Date:       {analysis_date.date()}")
print(f"   • Strategy Type:       Market Neutral (Long Winners / Short Losers)")
print(f"   • Optimization Target: Maximize Sharpe (Dollar Neutral)")
print(f"   • Constraints:         Max 20% per position")

# 2. GENERATE CANDIDATES (Prioritize ML Survivors)
active_longs = []
active_shorts = []
source = "None"

if 'survivor_longs' in locals() and survivor_longs:
    active_longs = survivor_longs
    if 'survivor_shorts' in locals(): active_shorts = survivor_shorts
    source = "ML-Verified Survivors (Block 17)"
elif 'container_longs' in locals():
    active_longs = container_longs
    active_shorts = container_shorts
    source = "Regime Filter Survivors (Block 4)"
else:
    # Emergency Fallback: Fresh Scan
    source = "Emergency Fresh Scan (Top 15 Momentum)"
    mom = (prices_stocks.iloc[-1] / prices_stocks.iloc[-252]) - 1
    active_longs = mom.nlargest(15).index.tolist()
    active_shorts = mom.nsmallest(15).index.tolist()

print(f"\n📥 CANDIDATE SOURCE: {source}")
print(f"   • Available Longs:  {len(active_longs)}")
print(f"   • Available Shorts: {len(active_shorts)}")

if len(active_longs) < 2 or len(active_shorts) < 2:
    print("❌ Not enough candidates to build a hedged portfolio.")
else:
    # 3. OPTIMIZATION ENGINE (MARKET NEUTRAL SPECIFIC)
    def optimize_hedge(longs, shorts):
        tickers = list(set(longs + shorts))
        
        # Get recent data (Last 6 months for Covariance Stability)
        p_sub = prices_stocks[tickers].iloc[-126:] 
        r_sub = p_sub.pct_change().fillna(0)
        
        mu = r_sub.mean() * 252
        cov = r_sub.cov() * 252
        num = len(tickers)
        rf = last_rf
        
        def neg_sharpe(w):
            ret = np.sum(w * mu)
            vol = np.sqrt(np.dot(w.T, np.dot(cov, w)))
            if vol == 0: return 0
            return -(ret - rf)/vol
        
        # CONSTRAINT: Sum of weights = 0 (Dollar Neutral)
        # This means £10k Long and £10k Short cancels out to £0 net exposure
        cons = [{'type': 'eq', 'fun': lambda x: np.sum(x) - 0}]
        
        # BOUNDS: 
        # Longs:  0% to +20%
        # Shorts: -20% to 0%
        bounds = []
        init_guess = []
        
        n_long = len([t for t in tickers if t in longs])
        n_short = len([t for t in tickers if t in shorts])
        
        for t in tickers:
            if t in shorts:
                bounds.append((-0.20, 0.0)) # Short Side
                init_guess.append(-0.5/n_short if n_short > 0 else 0)
            else:
                bounds.append((0.0, 0.20))  # Long Side
                init_guess.append(0.5/n_long if n_long > 0 else 0)
                    
        bounds = tuple(bounds)

        try:
            res = sco.minimize(neg_sharpe, init_guess, method='SLSQP', bounds=bounds, constraints=cons)
            if res.success:
                return pd.Series(res.x, index=tickers)
        except:
            return None
        return None

    print("\n... Calculating Optimal Hedged Positions ...")
    w_final_mn = optimize_hedge(active_longs, active_shorts)

    # 4. PRINT THE "CHEAT SHEET" (TRADE ORDERS)
    if w_final_mn is not None:
        print("\n" + "="*80)
        print(f"{'🛒 OFFICIAL HEDGED ORDERS (VALID FOR 1 MONTH) 🛒':^80}")
        print("="*80)
        print(f"STRATEGY GOAL: Recession Proofing. Profit from Spread (Winners - Losers).")
        print("-" * 80)
        print(f"{'TICKER':<10} | {'ALLOCATION':<12} | {'ACTION':<6} | {'SHARES (£10k)'}")
        print("-" * 80)
        
        capital = 10000
        current_prices = prices_stocks.iloc[-1]
        
        w_clean = w_final_mn[w_final_mn.abs() > 0.001].sort_values(ascending=False)
        
        for t, w in w_clean.items():
            price = current_prices.get(t, 0)
            amt = capital * abs(w) # Absolute value for share count
            shares = round(amt / price, 2) if price > 0 else 0
            
            if w > 0:
                action = "BUY"
                color_arrow = "🟢"
            else:
                action = "SELL" # Short Sell
                color_arrow = "🔴"
            
            print(f"{t:<10} | {abs(w):.2%}      | {action:<6} | {shares} shs {color_arrow}")
        print("-" * 80)

        # 5. VISUALIZATION
        plt.figure(figsize=(12, 6))
        colors = ['#00c853' if w > 0 else '#d50000' for w in w_clean.values]
        bars = plt.bar(w_clean.index, w_clean.values, color=colors, alpha=0.8, edgecolor='black')
        
        plt.axhline(0, color='black', linewidth=1)
        plt.title("Long-Short Portfolio Structure", fontsize=14, fontweight='bold')
        plt.ylabel("Weight (Positive = Long, Negative = Short)")
        plt.grid(axis='y', alpha=0.3)
        
        # Annotate
        for bar in bars:
            height = bar.get_height()
            offset = 0.005 if height > 0 else -0.015
            plt.text(bar.get_x() + bar.get_width()/2., height + offset,
                     f'{height:.1%}', ha='center', va='bottom', fontsize=8, fontweight='bold')

        plt.tight_layout()
        plt.show()

        # Save Globally
        w_smart_mn = w_clean
       # print("\n✅ Portfolio saved to variable 'w_smart_mn'. Ready for execution.")
        
    else:
        print("❌ Optimization Failed. Could not balance Longs and Shorts perfectly.")
================================================================================
               🚀 FINAL PRODUCTION RUN: LONG-SHORT HEDGED ORDERS 🚀               
================================================================================
⚙️ EXECUTION PARAMETERS:
   • Analysis Date:       2026-01-21
   • Strategy Type:       Market Neutral (Long Winners / Short Losers)
   • Optimization Target: Maximize Sharpe (Dollar Neutral)
   • Constraints:         Max 20% per position

📥 CANDIDATE SOURCE: ML-Verified Survivors (Block 17)
   • Available Longs:  32
   • Available Shorts: 6

... Calculating Optimal Hedged Positions ...

================================================================================
                 🛒 OFFICIAL HEDGED ORDERS (VALID FOR 1 MONTH) 🛒                 
================================================================================
STRATEGY GOAL: Recession Proofing. Profit from Spread (Winners - Losers).
--------------------------------------------------------------------------------
TICKER     | ALLOCATION   | ACTION | SHARES (£10k)
--------------------------------------------------------------------------------
IVZ        | 16.30%      | BUY    | 78.39 shs 🟢
MNST       | 10.05%      | BUY    | 16.55 shs 🟢
CVS        | 9.29%      | BUY    | 15.57 shs 🟢
HII        | 8.35%      | BUY    | 2.7 shs 🟢
INTC       | 4.85%      | BUY    | 13.41 shs 🟢
LRCX       | 4.39%      | BUY    | 2.65 shs 🟢
ALB        | 3.22%      | BUY    | 2.5 shs 🟢
EL         | 3.04%      | BUY    | 3.56 shs 🟢
HAS        | 2.96%      | BUY    | 4.67 shs 🟢
IDXX       | 2.62%      | BUY    | 0.51 shs 🟢
DG         | 2.04%      | BUY    | 1.87 shs 🟢
NEM        | 0.65%      | BUY    | 0.73 shs 🟢
INCY       | 0.11%      | BUY    | 0.14 shs 🟢
GIS        | 7.35%      | SELL   | 22.2 shs 🔴
FDS        | 8.34%      | SELL   | 4.04 shs 🔴
PYPL       | 9.35%      | SELL   | 22.77 shs 🔴
XYZ        | 9.93%      | SELL   | 21.26 shs 🔴
SWKS       | 12.91%      | SELL   | 30.19 shs 🔴
DVA        | 20.00%      | SELL   | 25.82 shs 🔴
--------------------------------------------------------------------------------
No description has been provided for this image

LET'S LOOK HOW THIS PORTFOLIO HAS PERFORMED LAST MONTH THEN.

1-YEAR HISTORICAL AUDIT (MARKET NEUTRAL EDITION)¶

In [25]:
# --- BLOCK 22: 1-YEAR HISTORICAL AUDIT (MARKET NEUTRAL EDITION) ---
print("\n" + "="*80)
print(f"{'LONG-SHORT PORTFOLIO: HEDGE FUND RISK METRICS':^80}")
print("="*80)

# 1. SETUP: DYNAMIC DATES (LAST 1 YEAR)
if 'prices_stocks' in locals() and not prices_stocks.empty:
    end_date = prices_stocks.index[-1]
    start_date = end_date - pd.DateOffset(years=1)
    if start_date < prices_stocks.index[0]:
        start_date = prices_stocks.index[0]
else:
    print("⚠️ Error: Price data not found.")
    end_date = pd.Timestamp.now()
    start_date = end_date - pd.DateOffset(years=1)

print(f"📅 AUDIT PERIOD: {start_date.date()} -> {end_date.date()}")

# 2. RETRIEVE HEDGED WEIGHTS
if 'w_smart_mn' in locals() and w_smart_mn is not None:
    weights = w_smart_mn
    print(f"   • Portfolio: Long-Short Market Neutral ({len(weights)} Positions)")
else:
    print("⚠️ Hedged weights not found. Run Block 21 (Hedged) first.")
    # Fallback: Dummy Hedge
    weights = pd.Series([0.1, -0.1], index=prices_stocks.columns[:2]) 

# 3. CALCULATE DATA
hist_prices = prices_stocks.loc[start_date:end_date]
hist_rets = hist_prices.pct_change().fillna(0)

# Portfolio Returns (Spread + Cash Interest)
# Note: Since weights sum to 0, we add the Risk Free Rate to simulate interest on capital
valid_assets = [t for t in weights.index if t in hist_rets.columns]
aligned_w = weights[valid_assets].reindex(hist_rets.columns).fillna(0)

# Pure Alpha Return (The Spread)
spread_daily = hist_rets.dot(aligned_w)

# Total Return (Spread + Risk Free Rate)
rf_daily = last_rf / 252
port_daily = spread_daily + rf_daily
equity_curve = 10000 * (1 + port_daily).cumprod()

# Benchmark Returns (S&P 500)
try:
    spy_data = yf.download("SPY", start=start_date, end=end_date, progress=False, auto_adjust=True)
    if isinstance(spy_data.columns, pd.MultiIndex):
        spy_close = spy_data.xs('Close', axis=1, level=0).iloc[:, 0]
    elif 'Close' in spy_data.columns:
        spy_close = spy_data['Close']
    else:
        spy_close = spy_data.iloc[:, 0]
    
    spy_close = spy_close.reindex(hist_prices.index).ffill()
    spy_daily = spy_close.pct_change().fillna(0)
    spy_curve = 10000 * (1 + spy_daily).cumprod()
    has_spy = True
except Exception as e:
    print(f"⚠️ Benchmark Warning: {e}")
    has_spy = False
    spy_daily = pd.Series(0, index=hist_prices.index)
    spy_curve = pd.Series(10000, index=hist_prices.index)

# 4. CALCULATE METRICS
rf = last_rf

# A. Basic Stats
port_total_ret = (equity_curve.iloc[-1] / 10000) - 1
spy_total_ret = (spy_curve.iloc[-1] / 10000) - 1

port_vol = port_daily.std() * np.sqrt(252)
spy_vol = spy_daily.std() * np.sqrt(252)

# Max Drawdown
port_dd = (equity_curve / equity_curve.cummax()) - 1
port_max_dd = port_dd.min()
spy_dd = (spy_curve / spy_curve.cummax()) - 1
spy_max_dd = spy_dd.min()

# B. Advanced Stats
if has_spy:
    covariance = port_daily.cov(spy_daily)
    variance = spy_daily.var()
    beta = covariance / variance
    correlation = port_daily.corr(spy_daily)
    alpha_jensen = port_total_ret - (rf + beta * (spy_total_ret - rf))
    
    if abs(beta) > 0.01:
        treynor = (port_total_ret - rf) / beta
    else:
        # For Market Neutral, Treynor blows up if Beta is 0. 
        # We use simple Sharpe instead.
        treynor = 0.0 
        
    sharpe = (port_total_ret - rf) / port_vol if port_vol > 0 else 0
    spy_sharpe = (spy_total_ret - rf) / spy_vol if spy_vol > 0 else 0
else:
    beta, correlation, alpha_jensen, treynor, sharpe = 0, 0, 0, 0, 0
    spy_sharpe = 0

# 5. REPORT TABLE
print("\n" + "="*80)
print(f"{'📊 HEDGE FUND PERFORMANCE REPORT (1 YEAR) 📊':^80}")
print("="*80)
print(f"{'METRIC':<25} | {'HEDGED PORTFOLIO':<18} | {'S&P 500 (BENCHMARK)'}")
print("-" * 80)
# Basics
print(f"{'Total Return':<25} | {port_total_ret:+.2%}           | {spy_total_ret:+.2%}")
print(f"{'Annual Volatility':<25} | {port_vol:.2%}             | {spy_vol:.2%}")
print(f"{'Max Drawdown':<25} | {port_max_dd:.2%}             | {spy_max_dd:.2%}")
print("-" * 80)
# Risk-Adjusted
print(f"{'Sharpe Ratio':<25} | {sharpe:.2f}               | {spy_sharpe:.2f}")
print(f"{'Jensen\'s Alpha':<25} | {alpha_jensen:+.2%}           | --")
print("-" * 80)
# Market Relationship
print(f"{'Beta':<25} | {beta:.2f}               | 1.00")
print(f"{'Correlation':<25} | {correlation:.2f}               | 1.00")
print("=" * 80)

# 6. EXPLANATION (ADAPTED FOR HEDGING)
print("\n📝 METRIC DECODER (MARKET NEUTRAL EDITION):")
if abs(beta) < 0.3:
    print(f"✅ BETA ({beta:.2f}): SUCCESS. Your portfolio is 'Uncorrelated'.")
    print("   It ignores market crashes and moves based on your stock picks.")
else:
    print(f"⚠️ BETA ({beta:.2f}): WARNING. Your hedge is leaking.")
    print("   You are still too correlated with the general market.")

# 7. VISUALIZATION
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8), sharex=True, gridspec_kw={'height_ratios': [3, 1]})

# Top Chart: Growth
ax1.plot(equity_curve, color='#6200ea', linewidth=2, label='Long-Short Hedge')
if has_spy:
    ax1.plot(spy_curve, color='gray', linestyle='--', label='S&P 500', alpha=0.5)
ax1.set_title(f"Hedged Growth vs Market (Last 12 Months)", fontsize=12, fontweight='bold')
ax1.set_ylabel("Portfolio Value (£)")
ax1.legend(loc="upper left")
ax1.grid(True, alpha=0.3)

# Bottom Chart: Drawdown
ax2.fill_between(port_dd.index, port_dd, 0, color='#d50000', alpha=0.3, label='Hedge Drawdown')
if has_spy:
    ax2.plot(spy_dd, color='gray', linestyle='--', linewidth=1, alpha=0.5, label='SPY Drawdown')

ax2.set_title(f"Risk Profile (Max Drawdown: {port_max_dd:.2%})", fontsize=10, fontweight='bold')
ax2.set_ylabel("Drop from Peak")
ax2.grid(True, alpha=0.3)
ax2.set_xlabel("Date")

plt.tight_layout()
plt.show()
================================================================================
                 LONG-SHORT PORTFOLIO: HEDGE FUND RISK METRICS                  
================================================================================
📅 AUDIT PERIOD: 2025-01-21 -> 2026-01-21
   • Portfolio: Long-Short Market Neutral (19 Positions)

================================================================================
                   📊 HEDGE FUND PERFORMANCE REPORT (1 YEAR) 📊                   
================================================================================
METRIC                    | HEDGED PORTFOLIO   | S&P 500 (BENCHMARK)
--------------------------------------------------------------------------------
Total Return              | +100.09%           | +13.68%
Annual Volatility         | 10.98%             | 18.92%
Max Drawdown              | -3.41%             | -18.76%
--------------------------------------------------------------------------------
Sharpe Ratio              | 9.12               | 0.72
Jensen's Alpha            | +99.69%           | --
--------------------------------------------------------------------------------
Beta                      | 0.03               | 1.00
Correlation               | 0.05               | 1.00
================================================================================

📝 METRIC DECODER (MARKET NEUTRAL EDITION):
✅ BETA (0.03): SUCCESS. Your portfolio is 'Uncorrelated'.
   It ignores market crashes and moves based on your stock picks.
No description has been provided for this image

In a normal "Buy and Hold" portfolio, return comes from the sensitivity (beta) of the portfolio to overall market direction.

But now I'll forced the portfolio's Beta to be 0.00. The alghoritm will use weights and the security's betas to achieve a portfolio that is uncorrelated with the overall market.

Therefore, the only way for the porfolio to make money is having the right stock picks, no matter the direction of the market.

In finance, return that is generated purely from skill/stock-picking (independent of the market) is defined as Alpha.

BETA-NEUTRAL HEDGE portfolio¶

In [26]:
# --- BLOCK 21: FINAL PRODUCTION EXECUTION ( BETA-NEUTRAL HEDGE) ---

print("\n" + "="*80)
print(f"{'🚀 FINAL PRODUCTION RUN: TRUE BETA-NEUTRAL HEDGING 🚀':^80}")
print("="*80)

# 1. SETUP & DATA
if 'prices_stocks' in locals() and not prices_stocks.empty:
    analysis_date = prices_stocks.index[-1]
else:
    print("⚠️ Error: 'prices_stocks' data not found.")
    analysis_date = pd.Timestamp.now()

print(f"⚙️ EXECUTION PARAMETERS:")
print(f"   • Analysis Date:       {analysis_date.date()}")
print(f"   • Strategy Type:       BETA NEUTRAL")
print(f"   • Optimization Target: Maximize Sharpe")
print(f"   • Constraint:          Sum(Weight * Beta) = 0.0")

# 2. GENERATE CANDIDATES (Prioritize ML Survivors)
active_longs = []
active_shorts = []
source = "None"

if 'survivor_longs' in locals() and survivor_longs:
    active_longs = survivor_longs
    if 'survivor_shorts' in locals(): active_shorts = survivor_shorts
    source = "ML-Verified Survivors (Block 17)"
elif 'container_longs' in locals():
    active_longs = container_longs
    active_shorts = container_shorts
    source = "Regime Filter Survivors (Block 4)"
else:
    source = "Emergency Fresh Scan"
    mom = (prices_stocks.iloc[-1] / prices_stocks.iloc[-252]) - 1
    active_longs = mom.nlargest(15).index.tolist()
    active_shorts = mom.nsmallest(15).index.tolist()

print(f"\n📥 CANDIDATE SOURCE: {source}")
print(f"   • Available Longs:  {len(active_longs)}")
print(f"   • Available Shorts: {len(active_shorts)}")

if len(active_longs) < 2 or len(active_shorts) < 2:
    print("❌ Not enough candidates to build a hedged portfolio.")
else:
    # 3. CALCULATE BETAS (CRITICAL STEP)
    print("\n... Calculating Stock Betas vs S&P 500 ...")
    tickers = list(set(active_longs + active_shorts))
    
    # Get Data
    p_sub = prices_stocks[tickers].iloc[-126:] # Last 6 months
    r_sub = p_sub.pct_change().fillna(0)
    
    # Get Benchmark (SPY)
    try:
        spy = yf.download("SPY", start=p_sub.index[0], end=p_sub.index[-1], progress=False, auto_adjust=True)
        if isinstance(spy.columns, pd.MultiIndex): spy = spy.xs('Close', axis=1, level=0).iloc[:, 0]
        else: spy = spy['Close'] if 'Close' in spy.columns else spy.iloc[:, 0]
        
        spy_ret = spy.reindex(r_sub.index).ffill().pct_change().fillna(0)
        
        # Calculate Beta for each stock
        # Beta = Cov(Stock, Market) / Var(Market)
        market_var = spy_ret.var()
        asset_betas = {}
        
        for t in tickers:
            cov = r_sub[t].cov(spy_ret)
            beta = cov / market_var if market_var > 0 else 1.0
            asset_betas[t] = beta
            
        s_betas = pd.Series(asset_betas)
        print(f"   • Beta Range: {s_betas.min():.2f} to {s_betas.max():.2f}")
        
    except Exception as e:
        print(f"⚠️ Warning: Could not calc Betas ({e}). Assuming Beta=1.0 for all (Dollar Neutral fallback).")
        s_betas = pd.Series(1.0, index=tickers)

    # 4. OPTIMIZATION ENGINE (BETA NEUTRAL)
    def optimize_beta_neutral(longs, shorts, betas):
        tickers = list(set(longs + shorts))
        aligned_betas = betas[tickers].values
        
        p_opt = prices_stocks[tickers].iloc[-126:]
        r_opt = p_opt.pct_change().fillna(0)
        
        mu = r_opt.mean() * 252
        cov = r_opt.cov() * 252
        num = len(tickers)
        rf = 0.04
        
        def neg_sharpe(w):
            ret = np.sum(w * mu)
            vol = np.sqrt(np.dot(w.T, np.dot(cov, w)))
            if vol == 0: return 0
            return -(ret - rf)/vol
        
        # CONSTRAINT: Sum of (Weight * Beta) = 0
        # This is the magic formula for Beta Neutrality
        cons = [{'type': 'eq', 'fun': lambda w: np.sum(w * aligned_betas) - 0}]
        
        bounds = []
        init_guess = []
        n_long = len([t for t in tickers if t in longs])
        n_short = len([t for t in tickers if t in shorts])
        
        for t in tickers:
            if t in shorts:
                bounds.append((-0.20, 0.0))
                init_guess.append(-0.5/n_short if n_short>0 else 0)
            else:
                bounds.append((0.0, 0.20))
                init_guess.append(0.5/n_long if n_long>0 else 0)
        
        bounds = tuple(bounds)

        try:
            res = sco.minimize(neg_sharpe, init_guess, method='SLSQP', bounds=bounds, constraints=cons)
            if res.success:
                return pd.Series(res.x, index=tickers)
        except:
            return None
        return None

    print("... Optimizing for Zero Beta Exposure ...")
    w_final_mn = optimize_beta_neutral(active_longs, active_shorts, s_betas)

    # 5. PRINT THE "CHEAT SHEET"
    if w_final_mn is not None:
        print("\n" + "="*80)
        print(f"{'🛒 OFFICIAL BETA-NEUTRAL ORDERS (VALID FOR 1 MONTH) 🛒':^80}")
        print("="*80)
        print(f"STRATEGY: Risk-Balanced Hedge. If Market Crashes, Beta Exposure ~ 0.")
        print("-" * 80)
        print(f"{'TICKER':<8} | {'BETA':<5} | {'ALLOCATION':<12} | {'ACTION':<6} | {'SHARES (£10k)'}")
        print("-" * 80)
        
        capital = 10000
        current_prices = prices_stocks.iloc[-1]
        w_clean = w_final_mn[w_final_mn.abs() > 0.001].sort_values(ascending=False)
        
        weighted_beta_sum = 0
        net_dollar_exposure = 0
        
        for t, w in w_clean.items():
            price = current_prices.get(t, 0)
            beta = s_betas.get(t, 1.0)
            
            amt = capital * abs(w)
            shares = round(amt / price, 2) if price > 0 else 0
            
            if w > 0:
                action = "BUY"
                col = "🟢"
            else:
                action = "SELL"
                col = "🔴"
            
            # Tracking stats
            weighted_beta_sum += (w * beta)
            net_dollar_exposure += w
            
            print(f"{t:<8} | {beta:<5.2f} | {abs(w):.2%}      | {action:<6} | {shares} shs {col}")
            
        print("-" * 80)
        print(f"✅ NET BETA EXPOSURE:   {weighted_beta_sum:.4f} (Target: 0.0000)")
        print(f"ℹ️ NET DOLLAR EXPOSURE: {net_dollar_exposure:.2%} (May not be zero!)")
        print("-" * 80)

        # 6. VISUALIZATION
        plt.figure(figsize=(12, 6))
        
        # We plot Weight * Beta to show how the RISK cancels out
        risk_contrib = w_clean * s_betas[w_clean.index]
        colors = ['#00c853' if r > 0 else '#d50000' for r in risk_contrib.values]
        
        plt.bar(w_clean.index, risk_contrib.values, color=colors, alpha=0.8, edgecolor='black')
        plt.axhline(0, color='black')
        plt.title("Risk Contribution (Weight × Beta)\nLook for Equal Areas Above and Below Line", fontsize=12, fontweight='bold')
        plt.ylabel("Beta Contribution")
        plt.tight_layout()
        plt.show()

        # Save Globally
        w_smart_mn = w_clean
       # print("\n✅ Portfolio saved to 'w_smart_mn'. Ready for Audit.")
        
    else:
        print("❌ Optimization Failed. Solvers could not find a Zero-Beta solution.")
================================================================================
              🚀 FINAL PRODUCTION RUN: TRUE BETA-NEUTRAL HEDGING 🚀               
================================================================================
⚙️ EXECUTION PARAMETERS:
   • Analysis Date:       2026-01-21
   • Strategy Type:       BETA NEUTRAL
   • Optimization Target: Maximize Sharpe
   • Constraint:          Sum(Weight * Beta) = 0.0

📥 CANDIDATE SOURCE: ML-Verified Survivors (Block 17)
   • Available Longs:  32
   • Available Shorts: 6

... Calculating Stock Betas vs S&P 500 ...
   • Beta Range: -0.47 to 2.66
... Optimizing for Zero Beta Exposure ...

================================================================================
              🛒 OFFICIAL BETA-NEUTRAL ORDERS (VALID FOR 1 MONTH) 🛒              
================================================================================
STRATEGY: Risk-Balanced Hedge. If Market Crashes, Beta Exposure ~ 0.
--------------------------------------------------------------------------------
TICKER   | BETA  | ALLOCATION   | ACTION | SHARES (£10k)
--------------------------------------------------------------------------------
MNST     | -0.18 | 20.00%      | BUY    | 32.94 shs 🟢
CVS      | 0.07  | 18.47%      | BUY    | 30.94 shs 🟢
IVZ      | 1.59  | 17.24%      | BUY    | 82.9 shs 🟢
HII      | 0.77  | 10.32%      | BUY    | 3.33 shs 🟢
HAS      | 0.98  | 8.51%      | BUY    | 13.41 shs 🟢
INTC     | 1.74  | 7.30%      | BUY    | 20.17 shs 🟢
COR      | -0.14 | 6.96%      | BUY    | 2.66 shs 🟢
DG       | 0.35  | 6.27%      | BUY    | 5.74 shs 🟢
RTX      | 0.60  | 5.63%      | BUY    | 3.85 shs 🟢
ALB      | 1.32  | 4.42%      | BUY    | 3.44 shs 🟢
NEM      | 0.71  | 3.82%      | BUY    | 4.31 shs 🟢
EL       | 1.24  | 3.64%      | BUY    | 4.27 shs 🟢
INCY     | 0.52  | 2.09%      | BUY    | 2.73 shs 🟢
CAH      | -0.31 | 1.45%      | BUY    | 0.93 shs 🟢
GOOGL    | 1.34  | 1.33%      | BUY    | 0.56 shs 🟢
LRCX     | 2.66  | 1.32%      | BUY    | 0.8 shs 🟢
IDXX     | 1.64  | 0.65%      | BUY    | 0.13 shs 🟢
FDS      | 0.26  | 8.27%      | SELL   | 4.01 shs 🔴
XYZ      | 1.93  | 10.23%      | SELL   | 21.91 shs 🔴
PYPL     | 1.42  | 16.37%      | SELL   | 39.89 shs 🔴
SWKS     | 1.61  | 17.37%      | SELL   | 40.6 shs 🔴
DVA      | 0.29  | 20.00%      | SELL   | 25.82 shs 🔴
--------------------------------------------------------------------------------
✅ NET BETA EXPOSURE:   -0.0000 (Target: 0.0000)
ℹ️ NET DOLLAR EXPOSURE: 47.19% (May not be zero!)
--------------------------------------------------------------------------------
No description has been provided for this image

1-YEAR HISTORICAL AUDIT (MARKET NEUTRAL EDITION)¶

In [27]:
# --- BLOCK 22: 1-YEAR HISTORICAL AUDIT (MARKET NEUTRAL EDITION) ---
print("\n" + "="*80)
print(f"{'LONG-SHORT PORTFOLIO: HEDGE FUND RISK METRICS':^80}")
print("="*80)

# 1. SETUP: DYNAMIC DATES (LAST 1 YEAR)
if 'prices_stocks' in locals() and not prices_stocks.empty:
    end_date = prices_stocks.index[-1]
    start_date = end_date - pd.DateOffset(years=1)
    if start_date < prices_stocks.index[0]:
        start_date = prices_stocks.index[0]
else:
    print("⚠️ Error: Price data not found.")
    end_date = pd.Timestamp.now()
    start_date = end_date - pd.DateOffset(years=1)

print(f"📅 AUDIT PERIOD: {start_date.date()} -> {end_date.date()}")

# 2. RETRIEVE HEDGED WEIGHTS
if 'w_smart_mn' in locals() and w_smart_mn is not None:
    weights = w_smart_mn
    print(f"   • Portfolio: Long-Short Market Neutral ({len(weights)} Positions)")
else:
    print("⚠️ Hedged weights not found. Run Block 21 (Hedged) first.")
    # Fallback: Dummy Hedge
    weights = pd.Series([0.1, -0.1], index=prices_stocks.columns[:2]) 

# 3. CALCULATE DATA
hist_prices = prices_stocks.loc[start_date:end_date]
hist_rets = hist_prices.pct_change().fillna(0)

# Portfolio Returns (Spread + Cash Interest)
# Note: Since weights sum to 0, we add the Risk Free Rate to simulate interest on capital
valid_assets = [t for t in weights.index if t in hist_rets.columns]
aligned_w = weights[valid_assets].reindex(hist_rets.columns).fillna(0)

# Pure Alpha Return (The Spread)
spread_daily = hist_rets.dot(aligned_w)

# Total Return (Spread + Risk Free Rate)
# We use 4% as the standard Risk Free Rate
rf_fixed = last_rf
rf_daily = rf_fixed / 252
port_daily = spread_daily + rf_daily
equity_curve = 10000 * (1 + port_daily).cumprod()

# Benchmark Returns (S&P 500)
try:
    spy_data = yf.download("SPY", start=start_date, end=end_date, progress=False, auto_adjust=True)
    if isinstance(spy_data.columns, pd.MultiIndex):
        spy_close = spy_data.xs('Close', axis=1, level=0).iloc[:, 0]
    elif 'Close' in spy_data.columns:
        spy_close = spy_data['Close']
    else:
        spy_close = spy_data.iloc[:, 0]
    
    spy_close = spy_close.reindex(hist_prices.index).ffill()
    spy_daily = spy_close.pct_change().fillna(0)
    spy_curve = 10000 * (1 + spy_daily).cumprod()
    has_spy = True
except Exception as e:
    print(f"⚠️ Benchmark Warning: {e}")
    has_spy = False
    spy_daily = pd.Series(0, index=hist_prices.index)
    spy_curve = pd.Series(10000, index=hist_prices.index)

# 4. CALCULATE METRICS
rf = rf_fixed

# A. Basic Stats
port_total_ret = (equity_curve.iloc[-1] / 10000) - 1
spy_total_ret = (spy_curve.iloc[-1] / 10000) - 1

port_vol = port_daily.std() * np.sqrt(252)
spy_vol = spy_daily.std() * np.sqrt(252)

# Max Drawdown
port_dd = (equity_curve / equity_curve.cummax()) - 1
port_max_dd = port_dd.min()
spy_dd = (spy_curve / spy_curve.cummax()) - 1
spy_max_dd = spy_dd.min()

# B. Advanced Stats
if has_spy:
    covariance = port_daily.cov(spy_daily)
    variance = spy_daily.var()
    beta = covariance / variance
    correlation = port_daily.corr(spy_daily)
    alpha_jensen = port_total_ret - (rf + beta * (spy_total_ret - rf))
    
    if abs(beta) > 0.01:
        treynor = (port_total_ret - rf) / beta
    else:
        # For Market Neutral, Treynor blows up if Beta is 0. 
        # We use simple Sharpe instead.
        treynor = 0.0 
        
    sharpe = (port_total_ret - rf) / port_vol if port_vol > 0 else 0
    spy_sharpe = (spy_total_ret - rf) / spy_vol if spy_vol > 0 else 0
else:
    beta, correlation, alpha_jensen, treynor, sharpe = 0, 0, 0, 0, 0
    spy_sharpe = 0

# 5. REPORT TABLE
print("\n" + "="*80)
print(f"{'📊 HEDGE FUND PERFORMANCE REPORT (1 YEAR) 📊':^80}")
print("="*80)
print(f"{'METRIC':<25} | {'HEDGED PORTFOLIO':<18} | {'S&P 500 (BENCHMARK)'}")
print("-" * 80)
# Basics
print(f"{'Total Return':<25} | {port_total_ret:+.2%}           | {spy_total_ret:+.2%}")
print(f"{'Annual Volatility':<25} | {port_vol:.2%}             | {spy_vol:.2%}")
print(f"{'Max Drawdown':<25} | {port_max_dd:.2%}             | {spy_max_dd:.2%}")
print("-" * 80)
# Risk-Adjusted
print(f"{'Sharpe Ratio':<25} | {sharpe:.2f}               | {spy_sharpe:.2f}")
print(f"{'Jensen\'s Alpha':<25} | {alpha_jensen:+.2%}           | --")
print("-" * 80)
# Market Relationship
print(f"{'Beta':<25} | {beta:.2f}               | 1.00")
print(f"{'Correlation':<25} | {correlation:.2f}               | 1.00")
print("=" * 80)

# 6. EXPLANATION (ADAPTED FOR HEDGING)
print("\n📝 METRIC DECODER (MARKET NEUTRAL EDITION):")
if abs(beta) < 0.3:
    print(f"✅ BETA ({beta:.2f}): SUCCESS. Your portfolio is 'Uncorrelated'.")
    print("   It ignores market crashes and moves based on your stock picks.")
else:
    print(f"⚠️ BETA ({beta:.2f}): WARNING. Your hedge is leaking.")
    print("   You are still too correlated with the general market.")

# 7. VISUALIZATION
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8), sharex=True, gridspec_kw={'height_ratios': [3, 1]})

# Top Chart: Growth
ax1.plot(equity_curve, color='#6200ea', linewidth=2, label='Long-Short Hedge')
if has_spy:
    ax1.plot(spy_curve, color='gray', linestyle='--', label='S&P 500', alpha=0.5)
ax1.set_title(f"Hedged Growth vs Market (Last 12 Months)", fontsize=12, fontweight='bold')
ax1.set_ylabel("Portfolio Value (£)")
ax1.legend(loc="upper left")
ax1.grid(True, alpha=0.3)

# Bottom Chart: Drawdown
ax2.fill_between(port_dd.index, port_dd, 0, color='#d50000', alpha=0.3, label='Hedge Drawdown')
if has_spy:
    ax2.plot(spy_dd, color='gray', linestyle='--', linewidth=1, alpha=0.5, label='SPY Drawdown')

ax2.set_title(f"Risk Profile (Max Drawdown: {port_max_dd:.2%})", fontsize=10, fontweight='bold')
ax2.set_ylabel("Drop from Peak")
ax2.grid(True, alpha=0.3)
ax2.set_xlabel("Date")

plt.tight_layout()
plt.show()
================================================================================
                 LONG-SHORT PORTFOLIO: HEDGE FUND RISK METRICS                  
================================================================================
📅 AUDIT PERIOD: 2025-01-21 -> 2026-01-21
   • Portfolio: Long-Short Market Neutral (22 Positions)

================================================================================
                   📊 HEDGE FUND PERFORMANCE REPORT (1 YEAR) 📊                   
================================================================================
METRIC                    | HEDGED PORTFOLIO   | S&P 500 (BENCHMARK)
--------------------------------------------------------------------------------
Total Return              | +161.70%           | +13.68%
Annual Volatility         | 14.68%             | 18.92%
Max Drawdown              | -5.40%             | -18.76%
--------------------------------------------------------------------------------
Sharpe Ratio              | 11.01               | 0.72
Jensen's Alpha            | +160.79%           | --
--------------------------------------------------------------------------------
Beta                      | 0.07               | 1.00
Correlation               | 0.08               | 1.00
================================================================================

📝 METRIC DECODER (MARKET NEUTRAL EDITION):
✅ BETA (0.07): SUCCESS. Your portfolio is 'Uncorrelated'.
   It ignores market crashes and moves based on your stock picks.
No description has been provided for this image
In [ ]:
 

SINGLE INDEX MODEL (SIM): SEPARATING SKILL (ALPHA) FROM LUCK (BETA)¶

In [28]:
# --- BLOCK 24 (CORRECTED): THE 3-WAY STRATEGY DIAGNOSTIC (GBP BENCHMARK) ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as sco
import statsmodels.api as sm
import yfinance as yf

print("\n" + "="*80)
print(f"{'LABORATORY: COMPARING 3 PORTFOLIO ARCHITECTURES (GBP BASE)':^80}")
print("="*80)

# 1. SETUP & DATA COLLECTION
# A. Get Candidates
active_longs = []
active_shorts = []

if 'survivor_longs' in locals() and survivor_longs:
    active_longs = survivor_longs
    active_shorts = survivor_shorts if 'survivor_shorts' in locals() else []
    print(f"✅ Loaded Survivors: {len(active_longs)} Longs, {len(active_shorts)} Shorts")
else:
    print("⚠️ No survivors found. Using raw Momentum scan.")
    mom = (prices_stocks.iloc[-1] / prices_stocks.iloc[-252]) - 1
    active_longs = mom.nlargest(15).index.tolist()
    active_shorts = mom.nsmallest(15).index.tolist()

# B. Define Window (Last 12 Months)
end_date = prices_stocks.index[-1]
start_date = end_date - pd.DateOffset(years=1)
if start_date < prices_stocks.index[0]: start_date = prices_stocks.index[0]

# C. CONSTRUCT GBP BENCHMARK (The "Apple to Apple" Fix)
# We download S&P 500 in USD and convert it using your Block 1 FX rates
print("... Constructing S&P 500 (GBP) Benchmark ...")
sp500_usd = yf.download("^GSPC", start=start_date, end=end_date, progress=False, auto_adjust=True)
if isinstance(sp500_usd.columns, pd.MultiIndex): sp500_usd = sp500_usd.xs('Close', axis=1, level=0).iloc[:, 0]
else: sp500_usd = sp500_usd['Close'] if 'Close' in sp500_usd.columns else sp500_usd.iloc[:, 0]

# Align FX rates to the SP500 dates
# We use the 'fx_rates' variable from Block 1
if 'fx_rates' in locals():
    fx_aligned = fx_rates.reindex(sp500_usd.index).ffill()
    sp500_gbp = sp500_usd / fx_aligned
else:
    print("⚠️ FX Rates not found. Using USD Benchmark.")
    sp500_gbp = sp500_usd

# Calculate Returns
mkt_ret = sp500_gbp.pct_change().fillna(0)
stk_ret = prices_stocks.loc[start_date:end_date].pct_change().fillna(0)

# 2. HELPER: CALCULATE ASSET BETAS (GBP vs GBP)
print("... Calculating Asset Betas (GBP Basis) ...")

# Align market and stock returns for beta calc
aligned_for_beta = pd.concat([stk_ret, mkt_ret], axis=1).dropna()
aligned_mkt = aligned_for_beta.iloc[:, -1] # Last column is market
aligned_stk = aligned_for_beta.iloc[:, :-1] # Rest are stocks

mkt_var = aligned_mkt.var()
asset_betas = {}

for t in (active_longs + active_shorts):
    if t in aligned_stk.columns:
        # Correlation of Stock(GBP) vs Market(GBP)
        cov = aligned_stk[t].cov(aligned_mkt)
        asset_betas[t] = cov / mkt_var if mkt_var > 0 else 1.0
s_betas = pd.Series(asset_betas)

# 3. GENERATE THE 3 PORTFOLIOS
def quick_optimize(mode):
    tickers = list(set(active_longs + active_shorts))
    if mode == "LONG_ONLY": tickers = active_longs 
    
    # Inputs (Last 6 months)
    sub_ret = stk_ret[tickers].iloc[-126:]
    mu = sub_ret.mean() * 252
    cov = sub_ret.cov() * 252
    rf = 0.04
    n = len(tickers)
    
    def neg_sharpe(w):
        r = np.sum(w*mu)
        v = np.sqrt(np.dot(w.T, np.dot(cov, w)))
        return -(r-rf)/v if v>0 else 0

    weights_sum = 1.0 if mode == "LONG_ONLY" else 0.0
    
    # Constraints
    cons = [{'type': 'eq', 'fun': lambda x: np.sum(x) - weights_sum}]
    
    # Add Beta Constraint for Strategy C
    if mode == "BETA_NEUTRAL":
        aligned_betas = s_betas[tickers].values
        cons.append({'type': 'eq', 'fun': lambda x: np.sum(x * aligned_betas) - 0})

    # Bounds
    bounds = []
    init_guess = []
    n_long = len([t for t in tickers if t in active_longs])
    n_short = len([t for t in tickers if t in active_shorts])
    
    for t in tickers:
        if mode == "LONG_ONLY":
            bounds.append((0.0, 0.20))
            init_guess.append(1.0/n)
        else: 
            if t in active_shorts:
                bounds.append((-0.20, 0.0))
                init_guess.append(-0.5/n_short if n_short>0 else 0)
            else:
                bounds.append((0.0, 0.20))
                init_guess.append(0.5/n_long if n_long>0 else 0)

    try:
        res = sco.minimize(neg_sharpe, init_guess, method='SLSQP', bounds=bounds, constraints=cons)
        return pd.Series(res.x, index=tickers)
    except:
        return None

print("... Building Strategy A: Long Only ...")
w_lo = quick_optimize("LONG_ONLY")

print("... Building Strategy B: Dollar Neutral (Cash Balanced) ...")
w_dn = quick_optimize("DOLLAR_NEUTRAL")

print("... Building Strategy C: Beta Neutral (Risk Balanced) ...")
w_bn = quick_optimize("BETA_NEUTRAL")

portfolios = {
    "Long Only": w_lo,
    "Dollar Neutral": w_dn,
    "Beta Neutral": w_bn
}

# 4. RUN REGRESSION DIAGNOSTIC
print("\n" + "-"*80)
print(f"{'REGRESSION RESULTS (1 YEAR LOOKBACK)':^80}")
print("-" * 80)
print(f"{'STRATEGY':<20} | {'BETA':<8} | {'ALPHA (Ann)':<12} | {'R-SQUARED':<10} | {'VERDICT'}")
print("-" * 80)

results_store = {}

for name, weights in portfolios.items():
    if weights is None: continue
    
    # Calc Return
    valid_assets = [t for t in weights.index if t in stk_ret.columns]
    port_ret = stk_ret[valid_assets].dot(weights[valid_assets])
    
    if "Neutral" in name: port_ret += (0.04/252) # Add Cash Interest
        
    # Prepare Excess Returns
    excess_port = port_ret - (0.04/252)
    excess_mkt = mkt_ret - (0.04/252)
    
    # --- CRITICAL FIX: FORCE ALIGNMENT ---
    # Merge and Drop NaNs to ensure strict index match for statsmodels
    reg_data = pd.concat([excess_port, excess_mkt], axis=1).dropna()
    reg_data.columns = ['Portfolio', 'Market']
    
    if len(reg_data) < 20:
        print(f"{name:<20} | INSUFFICIENT DATA POINTS FOR REGRESSION")
        continue

    y_reg = reg_data['Portfolio']
    X_reg = sm.add_constant(reg_data['Market'])
    
    model = sm.OLS(y_reg, X_reg).fit()
    
    alpha = model.params['const'] * 252
    beta = model.params['Market'] # Robust way to get slope
    r2 = model.rsquared
    
    results_store[name] = {'beta': beta, 'alpha': alpha, 'r2': r2, 'data': y_reg, 'market': reg_data['Market']}
    
    verdict = "Market Risk" if r2 > 0.5 else "Pure Alpha"
    if abs(beta) < 0.15 and "Neutral" in name: verdict = "True Hedge ✅"
    if abs(beta) > 0.30 and "Neutral" in name: verdict = "Leaky Hedge ⚠️"
    
    print(f"{name:<20} | {beta:<8.2f} | {alpha:<12.2%} | {r2:<10.2f} | {verdict}")

print("-" * 80)

# 5. VISUALIZATION
fig, axes = plt.subplots(1, 3, figsize=(18, 5), sharey=True)

for i, (name, stats) in enumerate(results_store.items()):
    ax = axes[i]
    y_vals = stats['data']
    x_vals = stats['market']
    
    ax.scatter(x_vals, y_vals, alpha=0.4, color='#2962ff')
    
    x_line = np.linspace(x_vals.min(), x_vals.max(), 100)
    y_line = (stats['alpha']/252) + stats['beta'] * x_line
    ax.plot(x_line, y_line, color='red', linewidth=2)
    
    ax.set_title(f"{name}\nBeta: {stats['beta']:.2f} | R2: {stats['r2']:.2f}")
    ax.set_xlabel("GBP Market Returns")
    ax.axhline(0, color='black', alpha=0.3)
    ax.axvline(0, color='black', alpha=0.3)
    if i == 0: ax.set_ylabel("Portfolio Returns")

plt.tight_layout()
plt.show()

print("\n💡 KEY TAKEAWAY:")
print("Comparing against the GBP Benchmark removes currency noise.")
print("Beta values now reflect TRUE stock sensitivity, not exchange rate fluctuations.")
================================================================================
           LABORATORY: COMPARING 3 PORTFOLIO ARCHITECTURES (GBP BASE)           
================================================================================
✅ Loaded Survivors: 32 Longs, 6 Shorts
... Constructing S&P 500 (GBP) Benchmark ...
... Calculating Asset Betas (GBP Basis) ...
... Building Strategy A: Long Only ...
... Building Strategy B: Dollar Neutral (Cash Balanced) ...
... Building Strategy C: Beta Neutral (Risk Balanced) ...

--------------------------------------------------------------------------------
                      REGRESSION RESULTS (1 YEAR LOOKBACK)                      
--------------------------------------------------------------------------------
STRATEGY             | BETA     | ALPHA (Ann)  | R-SQUARED  | VERDICT
--------------------------------------------------------------------------------
Long Only            | 0.63     | 58.27%       | 0.48       | Pure Alpha
Dollar Neutral       | 0.02     | 91.41%       | 0.00       | True Hedge ✅
Beta Neutral         | 0.00     | 93.48%       | -0.00      | True Hedge ✅
--------------------------------------------------------------------------------
No description has been provided for this image
💡 KEY TAKEAWAY:
Comparing against the GBP Benchmark removes currency noise.
Beta values now reflect TRUE stock sensitivity, not exchange rate fluctuations.

##################################################################################################################################################################################

This block is the Efficiency Scorecard.¶

It tells you if you are squeezing all the juice out of your stock picks, or if you are leaving profit on the table for the sake of safety. A sharpe ratio higher than the THEORETICAL portfolio would mean that the startegy 1 - 2 delivers more risk premium for unit of risk, on the other hand a lower Sharpe ratio would indicate that a more efficient portfolio exist (the reason could be that the weight constrains play a role in this of course - but I think diversification in securities selection is important too).

In [29]:
# --- BLOCK 25: PORTFOLIO DETAIL REPORT & THEORETICAL OPTIMUM (SMART) ---
import pandas as pd
import numpy as np
import scipy.optimize as sco

print("\n" + "="*80)
print(f"{'PORTFOLIO COMPOSITION & EFFICIENT FRONTIER CHECK':^80}")
print("="*80)

# 1. SMART WEIGHT SELECTION (Pick the Best Available)
# Strategy 1 (Long Only)
if 'w_smart_lo' in locals() and w_smart_lo is not None:
    w_report_lo = w_smart_lo
    name_lo = "Strategy 1: ML-Enhanced (Long Only)"
else:
    w_report_lo = None
    name_lo = "Strategy 1: Not Found"

# Strategy 2 (Market Neutral)
if 'w_smart_mn' in locals() and w_smart_mn is not None:
    w_report_mn = w_smart_mn
    name_mn = "Strategy 2: ML-Hedged (Market Neutral)"
else:
    w_report_mn = None
    name_mn = "Strategy 2: Not Found"

# 2. SETUP UNIVERSE (Combine Assets from Both)
active_assets = set()
if w_report_lo is not None: active_assets.update(w_report_lo.index)
if w_report_mn is not None: active_assets.update(w_report_mn.index)
active_assets = list(active_assets)

if not active_assets:
    print("⚠️ No assets found to analyze. Please run optimization blocks 19 & 21 first.")
else:
    # 3. GET STATS FOR OPTIMIZATION (Recent Regime: Last 3 Months)
    # We use recent history to judge "current" optimality
    prices_audit = prices_stocks[active_assets].iloc[-63:] 
    rets_audit = np.log(prices_audit / prices_audit.shift(1)).dropna()
    mu_audit = rets_audit.mean() * 252
    cov_audit = rets_audit.cov() * 252
    rf = 0.04 

    # 4. HELPER: PRINT DETAILS
    def print_portfolio_report(weights, name):
        if weights is None or weights.empty:
            print(f"⚠️ {name}: No weights found.")
            return

        # Align weights to current universe (fill missing with 0)
        valid_w = [t for t in weights.index if t in prices_audit.columns]
        w = weights[valid_w].reindex(active_assets).fillna(0)
        
        # Calc Stats
        p_ret = np.sum(w * mu_audit)
        p_vol = np.sqrt(np.dot(w.T, np.dot(cov_audit, w)))
        
        if p_vol > 0.0001:
            p_sharpe = (p_ret - rf) / p_vol
        else:
            p_sharpe = 0.0
        
        print(f"\n>>> {name.upper()}")
        print("-" * 60)
        print(f"Expected Return:  {p_ret:.2%}")
        print(f"Annual Risk:      {p_vol:.2%}")
        print(f"Sharpe Ratio:     {p_sharpe:.2f}")
        print("-" * 60)
        
        # Show Top Positions
        df = pd.DataFrame({'Weight': w})
        df = df[df['Weight'].abs() > 0.01].sort_values('Weight', ascending=False)
        
        # Print format nicely
        print(df.head(10).applymap(lambda x: f"{x:.2%}").to_string())
        print("-" * 60)

    # 5. REPORT ON EXISTING STRATEGIES
    if w_report_lo is not None:
        print_portfolio_report(w_report_lo, name_lo)

    if w_report_mn is not None:
        print_portfolio_report(w_report_mn, name_mn)

    # 6. CALCULATE THEORETICAL "MAX SHARPE" (The Efficient Frontier Tangency)
    # This ignores your 20% constraints and Long/Short rules to find pure mathematical perfection
    print("\n>>> CALCULATING THEORETICAL MAXIMUM SHARPE (UNCONSTRAINED)...")

    num_assets = len(active_assets)
    def neg_sharpe(w):
        p_ret = np.sum(w * mu_audit)
        p_vol = np.sqrt(np.dot(w.T, np.dot(cov_audit, w)))
        if p_vol < 1e-6: return 0
        return -(p_ret - rf) / p_vol

    # Constraints: Sum=1, Long Only (0,1) for simplicity of comparison
    cons = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
    bounds = tuple((0, 1) for _ in range(num_assets))

    try:
        # Start with equal weights
        init_guess = num_assets * [1./num_assets]
        res_tan = sco.minimize(neg_sharpe, init_guess, method='SLSQP', bounds=bounds, constraints=cons)

        if res_tan.success:
            w_tangency = pd.Series(res_tan.x, index=active_assets)
            # Filter dust
            w_tangency = w_tangency[w_tangency > 0.001]
            print_portfolio_report(w_tangency, "Strategy 3: Theoretical Max Sharpe (Benchmark)")
            
            # INSIGHT GENERATION
            print("\n💡 INSIGHT: THE COST OF CONSTRAINTS")
            print("Compare Strategy 3 (Theoretical) vs Strategy 1 (Real World).")
            print("If Strategy 3 has a MUCH higher Sharpe, it means your constraints (max 20%, etc.)")
            print("are protecting you from risk, but costing you significant upside.")
            
        else:
            print("⚠️ Could not converge on Max Sharpe portfolio.")
            
    except Exception as e:
        print(f"⚠️ Optimization Error: {e}")
================================================================================
                PORTFOLIO COMPOSITION & EFFICIENT FRONTIER CHECK                
================================================================================

>>> STRATEGY 1: ML-ENHANCED (LONG ONLY)
------------------------------------------------------------
Expected Return:  86.54%
Annual Risk:      14.79%
Sharpe Ratio:     5.58
------------------------------------------------------------
       Weight
MNST   20.00%
CVS    16.70%
HII    13.20%
DG     10.02%
GOOGL   8.88%
INTC    7.23%
NEM     6.49%
CAH     4.93%
ALB     4.00%
STX     2.68%
------------------------------------------------------------

>>> STRATEGY 2: ML-HEDGED (MARKET NEUTRAL)
------------------------------------------------------------
Expected Return:  146.40%
Annual Risk:      14.86%
Sharpe Ratio:     9.58
------------------------------------------------------------
      Weight
MNST  20.00%
CVS   18.47%
IVZ   17.24%
HII   10.32%
HAS    8.51%
INTC   7.30%
COR    6.96%
DG     6.27%
RTX    5.63%
ALB    4.42%
------------------------------------------------------------

>>> CALCULATING THEORETICAL MAXIMUM SHARPE (UNCONSTRAINED)...
C:\Users\Pierpaolo Mirizi\AppData\Local\Temp\ipykernel_34188\3612348975.py:75: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.
  print(df.head(10).applymap(lambda x: f"{x:.2%}").to_string())
C:\Users\Pierpaolo Mirizi\AppData\Local\Temp\ipykernel_34188\3612348975.py:75: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.
  print(df.head(10).applymap(lambda x: f"{x:.2%}").to_string())
>>> STRATEGY 3: THEORETICAL MAX SHARPE (BENCHMARK)
------------------------------------------------------------
Expected Return:  127.40%
Annual Risk:      16.98%
Sharpe Ratio:     7.27
------------------------------------------------------------
      Weight
MNST  21.53%
CMI   21.29%
DG    19.77%
ALB   12.01%
HII   11.11%
CAH    8.59%
NEM    5.70%
------------------------------------------------------------

💡 INSIGHT: THE COST OF CONSTRAINTS
Compare Strategy 3 (Theoretical) vs Strategy 1 (Real World).
If Strategy 3 has a MUCH higher Sharpe, it means your constraints (max 20%, etc.)
are protecting you from risk, but costing you significant upside.
C:\Users\Pierpaolo Mirizi\AppData\Local\Temp\ipykernel_34188\3612348975.py:75: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.
  print(df.head(10).applymap(lambda x: f"{x:.2%}").to_string())
In [30]:
# --- BLOCK 26: PORTFOLIO REPORT CARD (DUAL STRATEGY COMPARISON) ---
import pandas as pd
import numpy as np
import scipy.optimize as sco
import matplotlib.pyplot as plt

print("\n" + "="*80)
print(f"{'PORTFOLIO REPORT CARD: REALITY VS. MATH PERFECTION':^80}")
print("="*80)

# 1. IDENTIFY AVAILABLE STRATEGIES
strategies_to_test = {}

# Strategy 1 (Long Only)
if 'w_smart_lo' in locals() and w_smart_lo is not None:
    strategies_to_test["Strategy 1 (Smart Long)"] = w_smart_lo
elif 'w_case_lo' in locals() and w_case_lo is not None:
    strategies_to_test["Strategy 1 (Base Long)"] = w_case_lo

# Strategy 2 (Market Neutral)
if 'w_smart_mn' in locals() and w_smart_mn is not None:
    strategies_to_test["Strategy 2 (Smart Hedge)"] = w_smart_mn
elif 'w_case_mn' in locals() and w_case_mn is not None:
    strategies_to_test["Strategy 2 (Base Hedge)"] = w_case_mn

if not strategies_to_test:
    print("⚠️ No portfolios found. Please run optimization blocks (19 or 21) first.")
else:
    # 2. SETUP UNIVERSE (Union of all assets)
    active_assets = set()
    for w in strategies_to_test.values():
        active_assets.update(w.index)
    active_assets = list(active_assets)
    
    # Get Stats (Last 3 Months / 63 Days)
    if 'prices_stocks' in locals():
        prices_audit = prices_stocks[active_assets].iloc[-63:]
        rets_audit = np.log(prices_audit / prices_audit.shift(1)).dropna()
        mu_audit = rets_audit.mean() * 252
        cov_audit = rets_audit.cov() * 252
        rf = 0.04 
    else:
        print("❌ Error: Price data missing.")
        active_assets = []

    if active_assets:
        # 3. CALCULATE THEORETICAL MAX SHARPE (The "Perfect" Benchmark)
        # We calculate the unconstrained Long-Only Tangency portfolio as the gold standard
        print("... Calculating Theoretical Maximum Efficiency ...")
        num_assets = len(active_assets)
        
        def neg_sharpe(w):
            p_ret = np.sum(w * mu_audit)
            p_vol = np.sqrt(np.dot(w.T, np.dot(cov_audit, w)))
            if p_vol < 1e-6: return 0
            return -(p_ret - rf) / p_vol

        cons = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
        bounds = tuple((0, 1) for _ in range(num_assets))
        init_guess = num_assets * [1./num_assets]

        res_tan = sco.minimize(neg_sharpe, init_guess, method='SLSQP', bounds=bounds, constraints=cons)
        
        if res_tan.success:
            w_tangency = pd.Series(res_tan.x, index=active_assets)
            w_tangency = w_tangency[w_tangency > 0.001] # Filter dust
        else:
            w_tangency = pd.Series(dtype=float)

        # 4. HELPER: STATS CALCULATOR
        def get_stats(weights):
            valid_w = [t for t in weights.index if t in prices_audit.columns]
            w = weights[valid_w].reindex(active_assets).fillna(0)
            p_ret = np.sum(w * mu_audit)
            p_vol = np.sqrt(np.dot(w.T, np.dot(cov_audit, w)))
            p_sharpe = (p_ret - rf) / p_vol if p_vol > 0 else 0
            return p_ret, p_vol, p_sharpe

        # 5. EXECUTE COMPARISONS
        for name, strat_weights in strategies_to_test.items():
            print("\n" + "-"*80)
            print(f"🔎 AUDIT: {name.upper()}")
            print("-" * 80)
            
            # A. Metrics Table
            r_strat, v_strat, s_strat = get_stats(strat_weights)
            r_ideal, v_ideal, s_ideal = get_stats(w_tangency)
            
            print(f"{'METRIC':<20} | {'YOUR STRATEGY':<15} | {'THEORETICAL MAX':<15} | {'EFFICIENCY'}")
            print("-" * 80)
            print(f"{'Annual Return':<20} | {r_strat:<15.2%} | {r_ideal:<15.2%} | {r_strat/r_ideal:.1%}")
            print(f"{'Sharpe Ratio':<20} | {s_strat:<15.2f} | {s_ideal:<15.2f} | {s_strat/s_ideal:.1%}")
            print("-" * 80)
            
            if s_strat/s_ideal > 0.8:
                print("✅ VERDICT: Highly Efficient. You are capturing most of the available alpha.")
            else:
                print("⚠️ VERDICT: Constrained. Safety rules are costing significant potential return.")

            # B. Visualization
            combined = pd.DataFrame({
                'My Strategy': strat_weights,
                'Theoretical Max': w_tangency
            }).fillna(0)
            
            # Filter for plot clarity (>2% only)
            plot_data = combined[(combined['My Strategy'] > 0.02) | (combined['Theoretical Max'] > 0.02)]
            
            if not plot_data.empty:
                plot_data = plot_data.sort_values('My Strategy', ascending=False)
                
                # Color coding based on strategy type
                col_strat = '#d50000' if "Hedge" in name else '#00c853' # Red for Hedge, Green for Long
                
                ax = plot_data.plot(kind='bar', figsize=(14, 6), color=[col_strat, '#2962ff'], width=0.8)
                
                plt.title(f"{name} vs. Mathematical Perfection", fontsize=14, fontweight='bold')
                plt.ylabel("Allocation (%)")
                plt.grid(axis='y', linestyle='--', alpha=0.3)
                plt.xticks(rotation=45, ha='right')
                plt.legend()
                
                # Labels
                for p in ax.patches:
                    if p.get_height() > 0.01: # Only label big bars
                        ax.annotate(f"{p.get_height():.1%}", 
                                    (p.get_x() + p.get_width() / 2., p.get_height()), 
                                    ha='center', va='bottom', fontsize=8, xytext=(0, 3), textcoords='offset points')
                
                plt.tight_layout()
                plt.show()
            else:
                print("   (No significant positions to plot)")
================================================================================
               PORTFOLIO REPORT CARD: REALITY VS. MATH PERFECTION               
================================================================================
... Calculating Theoretical Maximum Efficiency ...

--------------------------------------------------------------------------------
🔎 AUDIT: STRATEGY 1 (SMART LONG)
--------------------------------------------------------------------------------
METRIC               | YOUR STRATEGY   | THEORETICAL MAX | EFFICIENCY
--------------------------------------------------------------------------------
Annual Return        | 86.54%          | 127.40%         | 67.9%
Sharpe Ratio         | 5.58            | 7.27            | 76.8%
--------------------------------------------------------------------------------
⚠️ VERDICT: Constrained. Safety rules are costing significant potential return.
No description has been provided for this image
--------------------------------------------------------------------------------
🔎 AUDIT: STRATEGY 2 (SMART HEDGE)
--------------------------------------------------------------------------------
METRIC               | YOUR STRATEGY   | THEORETICAL MAX | EFFICIENCY
--------------------------------------------------------------------------------
Annual Return        | 146.40%         | 127.40%         | 114.9%
Sharpe Ratio         | 9.58            | 7.27            | 131.9%
--------------------------------------------------------------------------------
✅ VERDICT: Highly Efficient. You are capturing most of the available alpha.
No description has been provided for this image
In [31]:
# --- BLOCK 28: BEAR MARKET STRESS TEST (MONTE CARLO SIMULATION) ---
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

print("\n" + "="*80)
print(f"{'STRESS TEST: PROBABILISTIC RISK FORECAST (ZOOMED)':^80}")
print("="*80)

# 1. INTELLIGENT PORTFOLIO SELECTION
strats_to_test = {}

# Strategy 1 (Long Only)
if 'w_smart_lo' in locals() and w_smart_lo is not None:
    strats_to_test['Strategy 1 (Smart Long)'] = {'w': w_smart_lo, 'c': '#00c853'} # Green
elif 'w_case_lo' in locals() and w_case_lo is not None:
    strats_to_test['Strategy 1 (Base Long)'] = {'w': w_case_lo, 'c': '#00c853'}

# Strategy 2 (Market Neutral)
if 'w_smart_mn' in locals() and w_smart_mn is not None:
    strats_to_test['Strategy 2 (Smart Hedge)'] = {'w': w_smart_mn, 'c': '#d50000'} # Red
elif 'w_case_mn' in locals() and w_case_mn is not None:
    strats_to_test['Strategy 2 (Base Hedge)'] = {'w': w_case_mn, 'c': '#d50000'}

# Benchmark (Theoretical Max)
if 'w_tangency' in locals() and not w_tangency.empty:
    strats_to_test['Theoretical Max'] = {'w': w_tangency, 'c': '#2962ff'} # Blue

# 2. CONFIGURATION (THE BEAR SCENARIO)
SIMULATIONS = 5000      # Number of parallel futures
TIME_HORIZON = 252      # 1 Year (Trading Days)
INITIAL_CAPITAL = 10000 # Starting money

# The Stress Factors
SENTIMENT_ADJ = 0.5     # Expect 50% less return than history
VOLATILITY_ADJ = 1.5    # Expect 50% more volatility (Panic mode)

# 3. PREPARE DATA ENGINE
active_assets = set()
for s in strats_to_test.values():
    active_assets.update(s['w'].index)
active_assets = list(active_assets)

if not active_assets:
    print("⚠️ No assets found. Run optimization blocks (19/21) first.")
else:
    # Recalculate Stats (Use last 3 months for 'Current Regime')
    prices_sim = prices_stocks[active_assets].iloc[-63:]
    rets_sim = np.log(prices_sim / prices_sim.shift(1)).dropna()
    
    mu_sim = rets_sim.mean() * 252
    sigma_sim = rets_sim.cov() * 252

    print(f"⚙️ Simulation Config: {SIMULATIONS} Paths over 1 Year.")
    print(f"   Scenario: Returns x{SENTIMENT_ADJ}, Volatility x{VOLATILITY_ADJ}")

    # 4. SIMULATION ENGINE (Geometric Brownian Motion)
    def run_bear_simulation(weights, name):
        if weights is None or weights.empty: return None, 0

        # Align weights
        valid_w = [t for t in weights.index if t in rets_sim.columns]
        w_vec = weights[valid_w].reindex(active_assets).fillna(0.0).values
        
        # Portfolio Stats (Historical)
        hist_mu = np.sum(w_vec * mu_sim) 
        hist_vol = np.sqrt(np.dot(w_vec.T, np.dot(sigma_sim, w_vec)))
        
        # Apply Stress Shocks
        sim_mu = hist_mu * SENTIMENT_ADJ
        sim_vol = hist_vol * VOLATILITY_ADJ
        
        # Monte Carlo Core (Vectorized)
        dt = 1/252
        # Random shocks: [Days, Sims]
        z = np.random.normal(0, 1, (TIME_HORIZON, SIMULATIONS))
        
        # Brownian Motion Equation
        drift_step = (sim_mu - 0.5 * sim_vol**2) * dt
        shock_step = sim_vol * np.sqrt(dt) * z
        
        log_ret_steps = drift_step + shock_step
        
        # Construct Paths
        # Insert 0 at start so all paths start at Initial Capital
        log_ret_steps = np.vstack([np.zeros((1, SIMULATIONS)), log_ret_steps])
        cumulative_paths = INITIAL_CAPITAL * np.exp(np.cumsum(log_ret_steps, axis=0))
        
        # Risk Metrics (Value at Risk)
        ending_values = cumulative_paths[-1]
        p5 = np.percentile(ending_values, 5)   # Worst 5% outcome
        
        print(f"{name:<28} | Risk (Vol): {sim_vol:.1%} | Worst Case (95%): £{p5:,.0f}")
        return cumulative_paths, p5

    # 5. EXECUTE SIMULATIONS
    print("-" * 80)
    print(f"{'STRATEGY':<28} | {'STRESS VOL':<12} | {'SURVIVAL (VaR 95%)'}")
    print("-" * 80)
    
    plot_data = {}
    for name, data in strats_to_test.items():
        paths, val = run_bear_simulation(data['w'], name)
        plot_data[name] = {'paths': paths, 'c': data['c']}

    print("-" * 80)

    # 6. VISUALIZATION (PROBABILITY CONES)
    plt.figure(figsize=(14, 8))
    
    # Calculate Zoom Limits (Focus on the 'Likely' outcomes, cut the lucky outliers)
    all_likely_tops = []
    for d in plot_data.values():
        if d['paths'] is not None:
            # We look at the 85th percentile, ignoring the top 15% lucky shots
            all_likely_tops.append(np.percentile(d['paths'][-1], 85))
    
    y_upper = max(max(all_likely_tops) * 1.15, INITIAL_CAPITAL * 1.2) if all_likely_tops else INITIAL_CAPITAL * 1.5
    y_lower = 0 # Can't go below zero

    # Plotting Helper
    def plot_cone(paths, color, label):
        if paths is not None:
            days = range(len(paths))
            # Percentiles across all simulations per day
            p05 = np.percentile(paths, 5, axis=1)
            p25 = np.percentile(paths, 25, axis=1)
            p50 = np.percentile(paths, 50, axis=1) # Median
            p75 = np.percentile(paths, 75, axis=1)
            p95 = np.percentile(paths, 95, axis=1)

            # Outer Cone (90% Probability Interval)
            plt.fill_between(days, p05, p95, color=color, alpha=0.1)
            # Inner Cone (50% Probability Interval - The "Likely" Outcome)
            plt.fill_between(days, p25, p75, color=color, alpha=0.2, label=f'{label} (Likely Range)')
            # Median Line
            plt.plot(days, p50, color=color, linewidth=2)
            # Crash Line (Worst Case)
            plt.plot(days, p05, color=color, linewidth=1, linestyle=':', alpha=0.6)

    for name, data in plot_data.items():
        plot_cone(data['paths'], data['c'], name)

    # Decorate
    plt.axhline(INITIAL_CAPITAL, color='black', linewidth=1.5, linestyle='--', label='Break Even')
    plt.title("Bear Market Stress Test: 1-Year Survival Forecast", fontsize=16, fontweight='bold')
    plt.ylabel("Portfolio Value (£)", fontsize=12)
    plt.xlabel("Trading Days (1 Year)", fontsize=12)
    plt.ylim(y_lower, y_upper) # Apply Smart Zoom
    plt.legend(loc='upper left', fontsize=10)
    plt.grid(True, alpha=0.2)
    
    plt.tight_layout()
    plt.show()

    print("\n💡 HOW TO READ THIS CHART:")
    print("1. SHADED CONES: Represent 5,000 possible futures. The darker area is the 'Likely' outcome.")
    print("2. DOTTED LINES: The 'Worst Case Scenario' (Bottom 5%). If this line stays above £0, you survive.")
    print("3. ZOOM: We cropped the top lucky outliers to focus on DOWNSIDE PROTECTION.")
================================================================================
               STRESS TEST: PROBABILISTIC RISK FORECAST (ZOOMED)                
================================================================================
⚙️ Simulation Config: 5000 Paths over 1 Year.
   Scenario: Returns x0.5, Volatility x1.5
--------------------------------------------------------------------------------
STRATEGY                     | STRESS VOL   | SURVIVAL (VaR 95%)
--------------------------------------------------------------------------------
Strategy 1 (Smart Long)      | Risk (Vol): 22.2% | Worst Case (95%): £10,442
Strategy 2 (Smart Hedge)     | Risk (Vol): 22.3% | Worst Case (95%): £14,140
Theoretical Max              | Risk (Vol): 25.5% | Worst Case (95%): £12,005
--------------------------------------------------------------------------------
No description has been provided for this image
💡 HOW TO READ THIS CHART:
1. SHADED CONES: Represent 5,000 possible futures. The darker area is the 'Likely' outcome.
2. DOTTED LINES: The 'Worst Case Scenario' (Bottom 5%). If this line stays above £0, you survive.
3. ZOOM: We cropped the top lucky outliers to focus on DOWNSIDE PROTECTION.
In [32]:
# --- BLOCK 29: FORWARD-LOOKING STRATEGY MIXER (EFFICIENT FRONTIER) ---
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

print("\n" + "="*80)
print(f"{'ALLOCATION LAB: FINDING THE GOLDEN RATIO':^80}")
print("="*80)

# 1. SETUP: IDENTIFY STRATEGIES
strat_lo = None
strat_mn = None
name_lo = "Long Only"
name_mn = "Market Neutral"

# Detect Long Only
if 'w_smart_lo' in locals() and w_smart_lo is not None:
    strat_lo = w_smart_lo
    name_lo = "Strategy 1 (Smart Long)"
elif 'w_case_lo' in locals() and w_case_lo is not None:
    strat_lo = w_case_lo
    name_lo = "Strategy 1 (Base Long)"

# Detect Market Neutral
if 'w_smart_mn' in locals() and w_smart_mn is not None:
    strat_mn = w_smart_mn
    name_mn = "Strategy 2 (Smart Hedge)"
elif 'w_case_mn' in locals() and w_case_mn is not None:
    strat_mn = w_case_mn
    name_mn = "Strategy 2 (Base Hedge)"

# 2. DATA ENGINE: CALCULATE REAL METRICS
if strat_lo is not None and strat_mn is not None:
    
    # Get recent history (Last 6 Months for robustness)
    assets = list(set(strat_lo.index) | set(strat_mn.index))
    
    # Ensure we have price data
    if 'prices_stocks' in locals():
        prices = prices_stocks[assets].iloc[-126:] # Last 6 months
        rets = np.log(prices / prices.shift(1)).fillna(0)
        
        # Construct Strategy Returns
        w_lo_aligned = strat_lo.reindex(assets).fillna(0)
        w_mn_aligned = strat_mn.reindex(assets).fillna(0)
        
        # Daily returns series
        series_lo = rets.dot(w_lo_aligned)
        series_mn = rets.dot(w_mn_aligned)
        
        # A. Calculate Real Volatility (Annualized)
        vol_lo = series_lo.std() * np.sqrt(252)
        vol_mn = series_mn.std() * np.sqrt(252)
        
        # B. Calculate Real Correlation
        real_corr = series_lo.corr(series_mn)
        
        # C. Define Return Assumptions (Forward Looking Estimates)
        ret_lo = 0.15 
        ret_mn = 0.08 
        rf_rate = 0.04

        print(f"📊 INPUTS (Data-Driven from Last 6 Months):")
        print(f"   > {name_lo:<25} | Risk: {vol_lo:.1%} | Est. Return: {ret_lo:.1%}")
        print(f"   > {name_mn:<25} | Risk: {vol_mn:.1%} | Est. Return: {ret_mn:.1%}")
        print(f"   > Correlation Coefficient:      {real_corr:.2f}")
        
        if real_corr < 0.3:
            print("   ✅ VERDICT: Low Correlation! Great diversification benefit.")
        elif real_corr > 0.7:
            print("   ⚠️ VERDICT: High Correlation. Benefit of mixing is limited.")

        # 3. GENERATE THE EFFICIENT FRONTIER CURVE
        mix_weights = np.linspace(0, 1, 100) # 0% to 100%
        curve_risks = []
        curve_returns = []
        sharpe_ratios = []

        for w in mix_weights:
            # w is weight in Long Only, (1-w) is weight in Hedge
            
            # Portfolio Return
            p_ret = (w * ret_lo) + ((1-w) * ret_mn)
            
            # Portfolio Risk
            p_var = (w**2 * vol_lo**2) + \
                    ((1-w)**2 * vol_mn**2) + \
                    (2 * w * (1-w) * vol_lo * vol_mn * real_corr)
            p_vol = np.sqrt(p_var)
            
            curve_risks.append(p_vol)
            curve_returns.append(p_ret)
            sharpe_ratios.append((p_ret - rf_rate) / p_vol)

        # 4. FIND THE GOLDEN RATIO (Max Sharpe)
        # --- FIX: Use the correct variable name 'sharpe_ratios' ---
        best_idx = np.argmax(sharpe_ratios)
        best_w_lo = mix_weights[best_idx]
        best_w_mn = 1 - best_w_lo
        
        best_ret = curve_returns[best_idx]
        best_vol = curve_risks[best_idx]

        # 5. VISUALIZATION
        plt.figure(figsize=(12, 8))
        
        # Plot Curve
        plt.plot(curve_risks, curve_returns, color='black', linewidth=3, label='Efficient Frontier', alpha=0.7)
        
        # Scatter Points for the Strategies
        plt.scatter(vol_lo, ret_lo, color='#00c853', s=200, marker='o', edgecolors='black', label=name_lo, zorder=5)
        plt.scatter(vol_mn, ret_mn, color='#d50000', s=200, marker='D', edgecolors='black', label=name_mn, zorder=5)
        
        # The Optimal Point
        plt.scatter(best_vol, best_ret, color='gold', s=400, marker='*', edgecolors='black', zorder=10, 
                    label=f'Golden Ratio ({best_w_lo:.0%} / {best_w_mn:.0%})')
        
        # Annotations
        plt.text(vol_lo, ret_lo + 0.005, "Aggressive", ha='center', fontsize=9, color='green')
        plt.text(vol_mn, ret_mn - 0.005, "Defensive", ha='center', fontsize=9, color='red')
        
        # Benefit Arrow
        min_asset_vol = min(vol_lo, vol_mn)
        if best_vol < min_asset_vol:
            plt.annotate("Diversification Alpha\n(Lower Risk than either strategy!)", 
                         xy=(best_vol, best_ret), 
                         xytext=(best_vol - 0.05, best_ret),
                         arrowprops=dict(facecolor='black', shrink=0.05))

        plt.title(f"Optimal Allocation: {name_lo} vs {name_mn}", fontsize=15, fontweight='bold')
        plt.xlabel("Projected Annual Risk (Volatility)", fontsize=12)
        plt.ylabel("Projected Annual Return", fontsize=12)
        plt.grid(True, alpha=0.3)
        plt.legend(loc='upper left', frameon=True)
        
        plt.tight_layout()
        plt.show()

        print("\n" + "="*80)
        print(f"{'🏆 FINAL RECOMMENDATION: THE GOLDEN RATIO 🏆':^80}")
        print("="*80)
        print(f"To maximize risk-adjusted returns (Sharpe Ratio), allocate:")
        print(f"   👉 {best_w_lo:.0%} to Strategy 1 (Long Only)")
        print(f"   👉 {best_w_mn:.0%} to Strategy 2 (Market Neutral)")
        print("-" * 80)
        print(f"Projected Combined Volatility: {best_vol:.1%}")
        print(f"Projected Combined Return:     {best_ret:.1%}")
    else:
        print("⚠️ 'prices_stocks' missing. Cannot calculate real correlation.")
else:
    print("⚠️ Need both Strategy 1 and Strategy 2 portfolios to calculate the optimal mix.")
================================================================================
                    ALLOCATION LAB: FINDING THE GOLDEN RATIO                    
================================================================================
📊 INPUTS (Data-Driven from Last 6 Months):
   > Strategy 1 (Smart Long)   | Risk: 13.0% | Est. Return: 15.0%
   > Strategy 2 (Smart Hedge)  | Risk: 13.7% | Est. Return: 8.0%
   > Correlation Coefficient:      0.61
No description has been provided for this image
================================================================================
                   🏆 FINAL RECOMMENDATION: THE GOLDEN RATIO 🏆                   
================================================================================
To maximize risk-adjusted returns (Sharpe Ratio), allocate:
   👉 100% to Strategy 1 (Long Only)
   👉 0% to Strategy 2 (Market Neutral)
--------------------------------------------------------------------------------
Projected Combined Volatility: 13.0%
Projected Combined Return:     15.0%

#####################################################################################################################################

STRESS TEST: COVID CRASH & RECOVERY (FEB - AUG 2020)¶

In [33]:
# --- BLOCK 29: STRESS TEST (EXPANDED: CRASH & RECOVERY) ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
import warnings

# Suppress warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

print("\n" + "="*80)
print(f"{'STRESS TEST: COVID CRASH & RECOVERY (FEB - AUG 2020)':^80}")
print("="*80)

# 1. IDENTIFY STRATEGIES
portfolios = {}

# Strategy 1: Smart Long-Only
if 'w_smart_lo' in locals() and w_smart_lo is not None:
    portfolios['Strategy 1 (Smart Growth)'] = w_smart_lo
elif 'w_case_lo' in locals() and w_case_lo is not None:
    portfolios['Strategy 1 (Base Growth)'] = w_case_lo

# Strategy 2: Smart Market Neutral
if 'w_smart_mn' in locals() and w_smart_mn is not None:
    portfolios['Strategy 2 (Smart Hedge)'] = w_smart_mn
elif 'w_case_mn' in locals() and w_case_mn is not None:
    portfolios['Strategy 2 (Base Hedge)'] = w_case_mn

if not portfolios:
    print("❌ No portfolios found. Please run Block 19 or 21 first.")
else:
    # 2. SETUP SCENARIO (6 MONTHS)
    stress_start = "2020-02-01" 
    stress_end   = "2020-08-01"

    all_tickers = set()
    for w in portfolios.values():
        all_tickers.update(w.index)
    all_tickers = list(all_tickers)

    print(f"Simulating {len(all_tickers)} assets over the Covid Crisis...")
    print(f"Period: {stress_start} to {stress_end}")

    try:
        # 3. DOWNLOAD HISTORICAL DATA
        data_stress = yf.download(
            all_tickers + ["^GSPC"], 
            start=stress_start, 
            end=stress_end, 
            progress=False, 
            auto_adjust=True
        )
        
        if isinstance(data_stress.columns, pd.MultiIndex):
            prices_stress = data_stress.xs('Close', axis=1, level=0)
        else:
            prices_stress = data_stress['Close']

        # Cleanup & Align
        prices_stress.index = pd.to_datetime(prices_stress.index).tz_localize(None)
        
        # 4. HANDLE "NEW" STOCKS (IPOs after 2020)
        # If a stock didn't exist in 2020, we assume that allocation is held as CASH (0 return)
        prices_stress = prices_stress.dropna(axis=1, how='all') # Drop cols with NO data
        
        # Calculate Returns
        rets_stress = prices_stress.pct_change().fillna(0)

        # 5. BENCHMARK (S&P 500)
        if "^GSPC" in rets_stress.columns:
            sp500_daily = rets_stress["^GSPC"]
            sp500_curve = 10000 * (1 + sp500_daily).cumprod()
            
            # Stats
            sp500_peak = sp500_curve.cummax()
            sp500_dd_series = (sp500_curve - sp500_peak) / sp500_peak
            sp500_max_dd = sp500_dd_series.min()
            sp500_total = (sp500_curve.iloc[-1] / 10000) - 1
        else:
            sp500_curve = pd.Series(10000, index=rets_stress.index)
            sp500_max_dd = 0
            sp500_total = 0

        # 6. SIMULATE STRATEGIES
        results_dd = {}
        results_ret = {}
        results_curve = {}
        results_underwater = {}
        
        # VISUALIZATION SETUP (Dual Panel)
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10), gridspec_kw={'height_ratios': [2, 1]}, sharex=True)
        
        # Plot Benchmark
        ax1.plot(sp500_curve, color='black', linestyle='--', linewidth=2, label=f'S&P 500 (DD: {sp500_max_dd:.1%})', alpha=0.5)
        ax2.plot(sp500_dd_series, color='black', linestyle='--', linewidth=1, alpha=0.5)
        ax2.fill_between(sp500_dd_series.index, sp500_dd_series, 0, color='gray', alpha=0.2)

        for name, weights in portfolios.items():
            # Filter for assets that existed in 2020
            valid_tickers = [t for t in weights.index if t in rets_stress.columns]
            missing_tickers = [t for t in weights.index if t not in rets_stress.columns]
            
            if len(missing_tickers) > 0:
                print(f"   ℹ️ {name}: {len(missing_tickers)} assets excluded (didn't exist in 2020). Treated as Cash.")
            
            if not valid_tickers:
                continue
                
            # Align Weights: Missing assets automatically become 0 (Cash) because we reindex
            aligned_w = weights.reindex(rets_stress.columns).fillna(0)
            
            # Portfolio Returns
            port_daily = rets_stress.dot(aligned_w)
            
            # Add Interest for Hedged Strategies (Cash Collateral Yield ~0.5% in 2020)
            if "Hedge" in name or "Neutral" in name:
                port_daily += (0.005/252) # Lower rates in 2020 than today

            port_curve = 10000 * (1 + port_daily).cumprod()
            
            # Calculate Drawdown
            port_peak = port_curve.cummax()
            port_dd_series = (port_curve - port_peak) / port_peak
            max_dd = port_dd_series.min()
            total_ret = (port_curve.iloc[-1] / 10000) - 1
            
            # Store Data
            results_dd[name] = max_dd
            results_ret[name] = total_ret
            results_curve[name] = port_curve
            
            # Plot Equity Curve
            color = '#00c853' if "Growth" in name else '#d50000'
            ax1.plot(port_curve, linewidth=2, label=f'{name}', color=color)
            
            # Plot Drawdown (Underwater)
            ax2.plot(port_dd_series, linewidth=1.5, color=color)
            ax2.fill_between(port_dd_series.index, port_dd_series, 0, color=color, alpha=0.1)

        # Formatting Panel 1 (Equity)
        ax1.set_title("Stress Test: Crash & Recovery (Feb-Aug 2020)", fontsize=14, fontweight='bold')
        ax1.set_ylabel("Portfolio Value (£10k Start)")
        ax1.legend()
        ax1.grid(True, alpha=0.3)
        
        # Formatting Panel 2 (Drawdown)
        ax2.set_title("The 'Pain' Chart: Drawdown % From Peak", fontsize=12, fontweight='bold')
        ax2.set_ylabel("Drawdown (%)")
        ax2.set_xlabel("Date")
        ax2.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()

        # 7. REPORT CARD
        print("\n" + "="*80)
        print(f"{'CRASH TEST REPORT CARD':^80}")
        print("="*80)
        print(f"{'STRATEGY':<30} | {'MAX DRAWDOWN':<15} | {'RECOVERY (Return)'}")
        print("-" * 80)
        print(f"{'S&P 500 (Benchmark)':<30} | {sp500_max_dd:<15.2%} | {sp500_total:+.2%}")
        
        for name in results_dd:
            dd = results_dd[name]
            ret = results_ret[name]
            print(f"{name:<30} | {dd:<15.2%} | {ret:+.2%}")
        print("-" * 80)
        
    except Exception as e:
        print(f"⚠️ Stress Test Failed: {e}")

# --- CONCENTRATION & SAFETY CHECK ---
print("\n" + "="*80)
print(f"{'FINAL SAFETY AUDIT':^80}")
print("="*80)

def audit_safety(weights, name, max_dd, bench_dd):
    if weights is None: return
    
    # 1. Concentration
    abs_w = weights.abs().sort_values(ascending=False)
    top_5 = abs_w.head(5).sum()
    
    print(f"Strategy: {name}")
    print(f"   • Top 5 Concentration:  {top_5:.1%}")
    print(f"   • Crash Resistance:     {max_dd:.1%} (vs Mkt {bench_dd:.1%})")
    
    # 2. Logic Verdicts
    verdict = []
    
    # Drawdown Check
    if abs(max_dd) < abs(bench_dd) * 0.8:
        verdict.append("✅ SAFER than Market")
    elif abs(max_dd) > abs(bench_dd) * 1.1:
        verdict.append("⚠️ RISKIER than Market")
    else:
        verdict.append("ℹ️ MATCHES Market Risk")
        
    # Concentration Check
    if top_5 > 0.50:
        verdict.append("⚠️ HIGH Concentration")
    else:
        verdict.append("✅ GOOD Diversification")
        
    print(f"   👉 VERDICT: {', '.join(verdict)}")
    print("-" * 40)

# Run Audit
for name, w in portfolios.items():
    if name in results_dd:
        audit_safety(w, name, results_dd[name], sp500_max_dd)
================================================================================
              STRESS TEST: COVID CRASH & RECOVERY (FEB - AUG 2020)              
================================================================================
Simulating 25 assets over the Covid Crisis...
Period: 2020-02-01 to 2020-08-01
No description has been provided for this image
================================================================================
                             CRASH TEST REPORT CARD                             
================================================================================
STRATEGY                       | MAX DRAWDOWN    | RECOVERY (Return)
--------------------------------------------------------------------------------
S&P 500 (Benchmark)            | -33.92%         | +0.68%
Strategy 1 (Smart Growth)      | -28.02%         | +2.60%
Strategy 2 (Smart Hedge)       | -29.71%         | -26.99%
--------------------------------------------------------------------------------

================================================================================
                               FINAL SAFETY AUDIT                               
================================================================================
Strategy: Strategy 1 (Smart Growth)
   • Top 5 Concentration:  68.8%
   • Crash Resistance:     -28.0% (vs Mkt -33.9%)
   👉 VERDICT: ℹ️ MATCHES Market Risk, ⚠️ HIGH Concentration
----------------------------------------
Strategy: Strategy 2 (Smart Hedge)
   • Top 5 Concentration:  93.1%
   • Crash Resistance:     -29.7% (vs Mkt -33.9%)
   👉 VERDICT: ℹ️ MATCHES Market Risk, ⚠️ HIGH Concentration
----------------------------------------

Event Study — a classic quantitative method to measure Post-Earnings Announcement Drift (PEAD).¶

This answers a critical trading question:

"When my stocks beat earnings, does the price jump once and stop?" (Efficient Market)

"Or does it keep drifting up for weeks?" (PEAD - Profitable)

As noted by Bernard and Thomas (1989), Ball and Brown (1968) were the first to observe that even after earnings announcements, estimated cumulative abnormal returns continue to drift upward for good news firms and downward for bad news firms. Competing explanations for post-earnings-announcement drift (PEAD) fall into two categories. One class posits that at least part of the price response to new information is delayed; a second suggests that the capital asset pricing model (CAPM) used to calculate abnormal returns is either incomplete or misestimated, so researchers fail to fully adjust raw returns for risk. Can we then take advantage of this recurring event in the market to make a consistent profit? The following block will show the effect of earnings announcement with consequent POSITIVE or NEGATIVE SURPRICE on securities. I have analysed both the SHORT TERM EFFECT and the MEDIUM TERM EFFECT.

Bernard, V.L. and Thomas, J.K. (1989). Post-Earnings-Announcement Drift: Delayed Price Response or Risk Premium? Journal of Accounting Research, 27, pp.1–36. doi:https://doi.org/10.2307/2491062.

In [34]:
# --- BLOCK 30: EARNINGS EVENT STUDY (PURE S&P 500 UNIVERSE) ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
import warnings

# Suppress warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

print("\n" + "="*80)
print(f"{'MARKET RESEARCH: EARNINGS DRIFT ON S&P 500 (SHORT vs QUARTERLY)':^80}")
print("="*80)

# 1. DEFINE TARGET UNIVERSE (PURE EQUITY LIST)
target_universe = []

# Priority 1: Use the clean 'sp500_tickers' list from Block 1
if 'sp500_tickers' in locals() and sp500_tickers:
    target_universe = sp500_tickers
    print(f"✅ Loaded Clean S&P 500 List: {len(target_universe)} Stocks")

# Priority 2: Filter 'prices_stocks' if the clean list is missing
elif 'prices_stocks' in locals() and not prices_stocks.empty:
    raw_list = prices_stocks.columns.tolist()
    # Explicitly remove non-equity assets
    exclude = ["BTC-USD", "ETH-USD", "TLT", "GLD", "SLV", "^TNX", "GBPUSD=X"]
    target_universe = [t for t in raw_list if t not in exclude]
    print(f"⚠️ 'sp500_tickers' not found. Filtered 'prices_stocks': {len(target_universe)} Assets")

else:
    print("⚠️ No Data Found. Using Top 50 Tech Fallback.")
    target_universe = ["AAPL", "MSFT", "GOOG", "AMZN", "NVDA", "TSLA", "META", "AMD", "NFLX", "INTC"]

print(f"🔎 Analyzing Earnings Reactions for {len(target_universe)} Stocks...")

# 2. BENCHMARK SETUP (EXTENDED HISTORY)
print("... Downloading Benchmark (SPY) Data ...")
try:
    spy = yf.download("SPY", start="2022-06-01", progress=False, auto_adjust=True)
    if isinstance(spy.columns, pd.MultiIndex):
        spy = spy.xs('Close', axis=1, level=0).iloc[:, 0]
    else:
        spy = spy['Close']
    
    spy_ret = np.log(spy / spy.shift(1)).fillna(0.0)
    spy_ret.index = pd.to_datetime(spy_ret.index).tz_localize(None)
except Exception as e:
    print(f"⚠️ Benchmark Download Failed: {e}")
    spy_ret = pd.Series(0, index=pd.date_range("2022-06-01", pd.Timestamp.now()))

# 3. INITIALIZE STORAGE
short_pos_curves = []
short_neg_curves = []
med_pos_curves = []
med_neg_curves = []

count_processed = 0
events_found = 0

print("-" * 80)
print(f"{'PROGRESS':<15} | {'TICKER':<10} | {'STATUS':<20} | {'EVENTS'}")
print("-" * 80)

# 4. MAIN LOOP
for ticker in target_universe:
    count_processed += 1
    
    # Progress Check (Print every 25 stocks)
    if count_processed % 25 != 0 and count_processed != len(target_universe):
        continue_print = False
    else:
        continue_print = True

    try:
        t = yf.Ticker(ticker)
        earn = t.earnings_dates
        
        if earn is None or earn.empty: 
            if continue_print: print(f"{count_processed}/{len(target_universe)} | {ticker:<10} | ⚪ No Data             | 0")
            continue

        # Clean Dates & Columns
        earn = earn.reset_index()
        date_col = earn.columns[0]
        earn[date_col] = pd.to_datetime(earn[date_col], utc=True).dt.tz_localize(None)
        
        rename_map = {}
        for c in earn.columns:
            c_str = str(c).lower()
            if 'reported' in c_str or 'actual' in c_str: rename_map[c] = 'Reported'
            if 'estimate' in c_str: rename_map[c] = 'Estimate'
        earn.rename(columns=rename_map, inplace=True)
        
        if 'Reported' not in earn.columns or 'Estimate' not in earn.columns: continue
        
        earn = earn.dropna(subset=['Reported', 'Estimate'])
        
        # Filter Events: 2023 onwards
        earn = earn[earn[date_col] >= pd.Timestamp("2023-01-01")]
        if earn.empty: continue

        # Calculate Surprise
        earn['Estimate'] = earn['Estimate'].replace(0, 0.01) 
        earn['Surprise'] = (earn['Reported'] - earn['Estimate']) / earn['Estimate'].abs()

        # Get Price History
        prices = t.history(start="2022-06-01", auto_adjust=True)['Close']
        if prices.empty: continue
        prices.index = pd.to_datetime(prices.index).tz_localize(None)
        returns = np.log(prices / prices.shift(1)).fillna(0.0)

        local_events = 0
        
        for idx, row in earn.iterrows():
            event_date = row[date_col]
            
            # Find Index Location
            if event_date not in returns.index:
                try:
                    loc_idx = returns.index.get_indexer([event_date], method='nearest')[0]
                except: continue
            else:
                loc_idx = returns.index.get_loc(event_date)
            
            # --- A. SHORT TERM ANALYSIS (-10 to +10) ---
            if loc_idx >= 10 and loc_idx + 10 < len(returns):
                r_slice = returns.iloc[loc_idx-10 : loc_idx+11]
                b_slice = spy_ret.reindex(r_slice.index).fillna(0.0)
                ar = r_slice - b_slice
                car = ar.cumsum()
                car = car - car.iloc[9] # Normalize to Day -1
                
                curve = pd.Series(car.values, index=range(-10, 11))
                
                if row['Surprise'] > 0.05: short_pos_curves.append(curve)
                elif row['Surprise'] < -0.05: short_neg_curves.append(curve)

            # --- B. MEDIUM TERM ANALYSIS (-63 to +63) ---
            if loc_idx >= 63 and loc_idx + 63 < len(returns):
                r_slice = returns.iloc[loc_idx-63 : loc_idx+64]
                b_slice = spy_ret.reindex(r_slice.index).fillna(0.0)
                ar = r_slice - b_slice
                car = ar.cumsum()
                car = car - car.iloc[63] # Normalize to Day 0
                
                curve = pd.Series(car.values, index=range(-63, 64))
                
                if row['Surprise'] > 0.05: med_pos_curves.append(curve)
                elif row['Surprise'] < -0.05: med_neg_curves.append(curve)
                
                local_events += 1
                events_found += 1

        if continue_print:
            print(f"{count_processed}/{len(target_universe)} | {ticker:<10} | ✅ Processed         | {local_events}")

    except Exception as e:
        continue

print("-" * 80)
print(f"Total Events Analyzed: {events_found}")

# 5. VISUALIZATION & STATISTICS

# --- A. SHORT TERM (PEAD) ---
if len(short_pos_curves) > 0 and len(short_neg_curves) > 0:
    avg_pos = pd.concat(short_pos_curves, axis=1).mean(axis=1)
    avg_neg = pd.concat(short_neg_curves, axis=1).mean(axis=1)

    # Stats
    jump_pos = avg_pos.loc[0]
    drift_pos = avg_pos.loc[10] - avg_pos.loc[0]
    total_pos = avg_pos.loc[10]

    jump_neg = avg_neg.loc[0]
    drift_neg = avg_neg.loc[10] - avg_neg.loc[0]
    total_neg = avg_neg.loc[10]

    plt.figure(figsize=(12, 6))
    plt.plot(avg_pos, color='#00c853', linewidth=3, label='Positive Surprise (>5%)')
    plt.plot(avg_neg, color='#d50000', linewidth=3, label='Negative Surprise (<-5%)')
    plt.axvline(0, color='black', linestyle='--', linewidth=1)
    plt.axhline(0, color='gray', linewidth=0.5)
    plt.title(f"Market-Wide Short-Term Drift (N={len(short_pos_curves)} Events)", fontsize=14, fontweight='bold')
    plt.xlabel("Days Relative to Earnings")
    plt.ylabel("Cumulative Abnormal Return")
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.show()

    print("\n" + "="*60)
    print(f"{'SHORT TERM STATS (10 DAYS) - S&P 500':^60}")
    print("="*60)
    print(f"{'METRIC':<25} | {'WINNERS (Beats)':<15} | {'LOSERS (Misses)'}")
    print("-" * 60)
    print(f"{'Initial Pop (Day 0)':<25} | {jump_pos:+.2%}          | {jump_neg:+.2%}")
    print(f"{'Post-Event Drift':<25} | {drift_pos:+.2%}          | {drift_neg:+.2%}")
    print(f"{'Total 10-Day Return':<25} | {total_pos:+.2%}          | {total_neg:+.2%}")
    
    if drift_pos > 0.005: 
        print("\n✅ INSIGHT: The Market SHOWS PEAD. Holding winners pays off.")
    else:
        print("\n⚠️ INSIGHT: The Market is EFFICIENT. Gains happen instantly. Don't chase.")

# --- B. MEDIUM TERM (3 MONTHS) ---
if len(med_pos_curves) > 0 and len(med_neg_curves) > 0:
    avg_pos_med = pd.concat(med_pos_curves, axis=1).mean(axis=1)
    avg_neg_med = pd.concat(med_neg_curves, axis=1).mean(axis=1)

    # Stats
    qtr_return_pos = avg_pos_med.iloc[-1]
    qtr_return_neg = avg_neg_med.iloc[-1]

    plt.figure(figsize=(12, 6))
    plt.plot(avg_pos_med, color='#00c853', linewidth=2, label='Beat Earnings (>5%)')
    plt.plot(avg_neg_med, color='#d50000', linewidth=2, label='Missed Earnings (<-5%)')
    plt.axvline(0, color='black', linestyle='--', linewidth=1)
    plt.axhline(0, color='gray', linewidth=0.5)
    plt.title(f"Market-Wide Quarterly Trend (N={len(med_pos_curves)} Events)", fontsize=14, fontweight='bold')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    print("\n" + "="*60)
    print(f"{'MEDIUM TERM STATS (3 MONTHS) - S&P 500':^60}")
    print("="*60)
    print(f"{'METRIC':<25} | {'WINNERS':<15} | {'LOSERS'}")
    print("-" * 60)
    print(f"{'Total 3-Mo Return':<25} | {qtr_return_pos:+.2%}          | {qtr_return_neg:+.2%}")
    
    print("-" * 60)
    if qtr_return_pos > total_pos:
        print("✅ INSIGHT: Earnings beats drive trends that last the entire quarter.")
    else:
        print("⚠️ INSIGHT: Earnings beats are short-lived. The market Mean Reverts.")
else:
    print("⚠️ Not enough history to generate the 3-month chart.")
================================================================================
        MARKET RESEARCH: EARNINGS DRIFT ON S&P 500 (SHORT vs QUARTERLY)         
================================================================================
✅ Loaded Clean S&P 500 List: 503 Stocks
🔎 Analyzing Earnings Reactions for 503 Stocks...
... Downloading Benchmark (SPY) Data ...
--------------------------------------------------------------------------------
PROGRESS        | TICKER     | STATUS               | EVENTS
--------------------------------------------------------------------------------
25/503 | AEE        | ✅ Processed         | 11
50/503 | ATO        | ✅ Processed         | 11
75/503 | BR         | ✅ Processed         | 11
100/503 | CHTR       | ✅ Processed         | 11
125/503 | COO        | ✅ Processed         | 11
150/503 | FANG       | ✅ Processed         | 11
175/503 | EQT        | ✅ Processed         | 11
200/503 | FE         | ✅ Processed         | 11
225/503 | HAL        | ✅ Processed         | 11
250/503 | IR         | ✅ Processed         | 11
275/503 | KIM        | ✅ Processed         | 11
300/503 | MRSH       | ✅ Processed         | 12
325/503 | MS         | ✅ Processed         | 12
350/503 | ODFL       | ✅ Processed         | 11
375/503 | PPL        | ✅ Processed         | 11
400/503 | ROK        | ✅ Processed         | 11
425/503 | STLD       | ✅ Processed         | 11
450/503 | TT         | ✅ Processed         | 11
475/503 | VICI       | ✅ Processed         | 11
500/503 | YUM        | ✅ Processed         | 11
503/503 | ZTS        | ✅ Processed         | 11
--------------------------------------------------------------------------------
Total Events Analyzed: 5487
No description has been provided for this image
============================================================
            SHORT TERM STATS (10 DAYS) - S&P 500            
============================================================
METRIC                    | WINNERS (Beats) | LOSERS (Misses)
------------------------------------------------------------
Initial Pop (Day 0)       | +1.17%          | -2.46%
Post-Event Drift          | -0.35%          | -0.16%
Total 10-Day Return       | +0.83%          | -2.62%

⚠️ INSIGHT: The Market is EFFICIENT. Gains happen instantly. Don't chase.
No description has been provided for this image
============================================================
           MEDIUM TERM STATS (3 MONTHS) - S&P 500           
============================================================
METRIC                    | WINNERS         | LOSERS
------------------------------------------------------------
Total 3-Mo Return         | -1.56%          | -2.81%
------------------------------------------------------------
⚠️ INSIGHT: Earnings beats are short-lived. The market Mean Reverts.

Let's now see the strategy in action. We buy those stocks that have a earning surprise bigger than 2%, we hold them for 10 days then sell. Each trade size is £100.

In [35]:
# --- BLOCK 31: EARNINGS MOMENTUM BACKTEST (FULL MARKET UNIVERSE) ---
import seaborn as sns

# Suppress warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

print("\n" + "="*80)
print(f"{'STRATEGY OVERLAY: EARNINGS MOMENTUM BACKTEST (FULL MARKET)':^80}")
print("="*80)

# 1. STRATEGY PARAMETERS
HOLDING_PERIOD = 10       # Hold for 10 trading days (approx 2 weeks)
SURPRISE_THRESHOLD = 0.02 # 2% Beat/Miss required to trigger trade
ALLOCATION = 100          # Notional trade size (for PnL calc)

# 2. DEFINE UNIVERSE (PURE EQUITY LIST)
target_universe = []
source = "None"

# Priority 1: Use the clean 'sp500_tickers' list from Block 1
if 'sp500_tickers' in locals() and sp500_tickers:
    target_universe = sp500_tickers
    source = "Block 1 (S&P 500 Scrape)"

# Priority 2: Filter 'prices_stocks' if the clean list is missing
elif 'prices_stocks' in locals() and not prices_stocks.empty:
    raw_list = prices_stocks.columns.tolist()
    # Explicitly remove non-equity assets (Crypto, Bonds, FX, Indices)
    exclude = ["BTC-USD", "ETH-USD", "TLT", "GLD", "SLV", "^TNX", "GBPUSD=X", "USDT-USD", "^GSPC"]
    target_universe = [t for t in raw_list if t not in exclude]
    source = "Block 1 (Price Matrix Filtered)"

# Priority 3: Fallback
else:
    target_universe = ["AAPL", "MSFT", "GOOG", "AMZN", "NVDA", "TSLA", "META", "AMD", "JPM"]
    source = "Fallback List"

print(f"📥 Source: {source}")
print(f"🔎 Simulating Trades on {len(target_universe)} Assets (2023-Present)...")
print("   (This may take a moment due to data volume)")

trade_log = []
count_processed = 0

# 3. FAST SIMULATION ENGINE
for ticker in target_universe:
    count_processed += 1
    # Simple progress indicator for large lists
    if len(target_universe) > 50 and count_processed % 50 == 0:
        print(f"   ... Processed {count_processed}/{len(target_universe)} tickers ...")

    try:
        # A. Fetch Data (Earnings & Price)
        t = yf.Ticker(ticker)
        
        # Get Earnings
        earn = t.earnings_dates
        if earn is None or earn.empty: continue
        
        # Get Prices (Need 'Open' for realistic entry)
        prices = t.history(start="2023-01-01", auto_adjust=True)
        if prices.empty: continue
        
        # B. Align Dates & Timezones (Critical Optimization)
        prices.index = pd.to_datetime(prices.index).tz_localize(None)
        
        earn = earn.reset_index()
        earn.rename(columns={earn.columns[0]: 'EventDate'}, inplace=True)
        earn['EventDate'] = pd.to_datetime(earn['EventDate'], utc=True).dt.tz_localize(None)
        
        # Robust Column Mapping
        rename_map = {}
        for c in earn.columns:
            if 'reported' in str(c).lower(): rename_map[c] = 'Reported'
            if 'estimate' in str(c).lower(): rename_map[c] = 'Estimate'
        earn.rename(columns=rename_map, inplace=True)
        
        if 'Reported' not in earn.columns: continue
        
        # C. Process Events
        for idx, row in earn.iterrows():
            event_date = row['EventDate']
            
            # Filter Timeframe
            if event_date.year < 2023 or event_date > pd.Timestamp.now(): continue
            
            # Calc Surprise
            est = row['Estimate']
            act = row['Reported']
            if pd.isna(est) or pd.isna(act) or est == 0: continue
            
            surprise = (act - est) / abs(est)
            
            # --- SIGNAL ---
            direction = "FLAT"
            if surprise > SURPRISE_THRESHOLD: direction = "LONG"
            elif surprise < -SURPRISE_THRESHOLD: direction = "SHORT"
            else: continue 
            
            # --- EXECUTION (Vectorized Lookup) ---
            # Find first trading day AFTER event
            future_prices = prices.loc[prices.index > event_date]
            if future_prices.empty: continue
            
            entry_date = future_prices.index[0]
            
            # Entry at OPEN (More realistic than Close)
            entry_price = future_prices.loc[entry_date, 'Open']
            
            # Exit at CLOSE after Holding Period
            idx_loc = prices.index.get_loc(entry_date)
            if idx_loc + HOLDING_PERIOD >= len(prices): continue 
            
            exit_date = prices.index[idx_loc + HOLDING_PERIOD]
            exit_price = prices.loc[exit_date, 'Close']
            
            # --- METRICS ---
            roi = 0.0
            pnl = 0.0
            
            if direction == "LONG":
                roi = (exit_price - entry_price) / entry_price
                pnl = roi * ALLOCATION
            else: # SHORT
                roi = (entry_price - exit_price) / entry_price
                pnl = roi * ALLOCATION
            
            trade_log.append({
                'Ticker': ticker,
                'Type': direction,
                'Date': entry_date,
                'Surprise': surprise,
                'ROI_Pct': roi,
                'PnL': pnl
            })

    except Exception as e:
        continue

# 4. ADVANCED REPORTING
if len(trade_log) > 0:
    df = pd.DataFrame(trade_log)
    df = df.sort_values("Date")
    df['Cumulative_PnL'] = df['PnL'].cumsum()
    
    # Key Metrics
    total_trades = len(df)
    win_rate = len(df[df['PnL'] > 0]) / total_trades
    avg_roi = df['ROI_Pct'].mean()
    median_roi = df['ROI_Pct'].median()
    total_profit = df['PnL'].sum()
    
    print("\n" + "="*80)
    print(f"{'📊 MARKET-WIDE STRATEGY REPORT 📊':^80}")
    print("="*80)
    print(f"Total Trades:           {total_trades}")
    print(f"Win Rate:               {win_rate:.1%}")
    print(f"Avg Return per Trade:   {avg_roi:+.2%}  <-- EDGE VERIFICATION")
    print(f"Median Return:          {median_roi:+.2%}")
    print(f"Total Profit (on £{ALLOCATION}): £{total_profit:.2f}")
    print("-" * 80)
    
    # 5. VISUALIZATION (DUAL PANEL)
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10), gridspec_kw={'height_ratios': [2, 1]})
    
    # Panel 1: Equity Curve
    ax1.plot(df['Date'], df['Cumulative_PnL'], color='#2962ff', linewidth=2, label='Strategy Equity')
    ax1.fill_between(df['Date'], df['Cumulative_PnL'], 0, color='#2962ff', alpha=0.1)
    ax1.set_title(f"Cumulative Profit Growth (Market-Wide Backtest)", fontsize=12, fontweight='bold')
    ax1.set_ylabel("Net Profit (£)")
    ax1.grid(True, alpha=0.3)
    ax1.legend(loc="upper left")
    
    # Panel 2: Return Distribution (Histogram)
    sns.histplot(df['ROI_Pct'] * 100, bins=30, kde=True, ax=ax2, color='#00c853', edgecolor='black')
    ax2.axvline(0, color='red', linestyle='--', linewidth=1.5)
    ax2.set_title(f"Trade Return Distribution (Avg: {avg_roi:.2%})", fontsize=12, fontweight='bold')
    ax2.set_xlabel("Return per Trade (%)")
    ax2.set_ylabel("Frequency")
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Top Trades
    print("\n🏆 HALL OF FAME (Top 5 Wins):")
    best = df.sort_values("ROI_Pct", ascending=False).head(5)
    print(best[['Ticker', 'Type', 'Date', 'Surprise', 'ROI_Pct', 'PnL']].to_string(index=False, formatters={'ROI_Pct': '{:.2%}'.format, 'PnL': '£{:.2f}'.format, 'Surprise': '{:.2%}'.format}))

else:
    print("⚠️ No trades generated. Check if stocks had earnings surprises > 2%.")
================================================================================
           STRATEGY OVERLAY: EARNINGS MOMENTUM BACKTEST (FULL MARKET)           
================================================================================
📥 Source: Block 1 (S&P 500 Scrape)
🔎 Simulating Trades on 503 Assets (2023-Present)...
   (This may take a moment due to data volume)
   ... Processed 50/503 tickers ...
   ... Processed 100/503 tickers ...
   ... Processed 150/503 tickers ...
   ... Processed 200/503 tickers ...
   ... Processed 250/503 tickers ...
   ... Processed 300/503 tickers ...
   ... Processed 350/503 tickers ...
   ... Processed 400/503 tickers ...
   ... Processed 500/503 tickers ...

================================================================================
                        📊 MARKET-WIDE STRATEGY REPORT 📊                         
================================================================================
Total Trades:           3950
Win Rate:               52.4%
Avg Return per Trade:   +0.40%  <-- EDGE VERIFICATION
Median Return:          +0.37%
Total Profit (on £100): £1577.41
--------------------------------------------------------------------------------
No description has been provided for this image
🏆 HALL OF FAME (Top 5 Wins):
Ticker Type       Date Surprise ROI_Pct    PnL
   APP LONG 2024-11-07   34.41%  38.37% £38.37
  PLTR LONG 2023-05-09   25.00%  37.09% £37.09
  ERIE LONG 2023-07-28   19.37%  34.18% £34.18
  PLTR LONG 2024-11-05   50.00%  31.59% £31.59
   XYZ LONG 2024-11-08   28.57%  28.33% £28.33

Post-earnings announcement drift (PEAD), also called post-earnings momentum, captures a market anomaly where stocks drift in the direction of an earnings surprise for weeks or months after the announcement. A win rate that exceeds the 50% mark over sufficient trades indicates a statistical edge, potentially profitable with favorable risk-reward ratios despite occasional losses. High-frequency trading (HFT) firms exploit this effectively due to rapid execution, while manual traders face challenges from holding periods and transaction costs.

############################################################################################################################## This project represents my own analysis. Where standard algorithms or code snippets from public documentation (e.g., Pandas documentation, StackOverflow) were used, they have been adapted to fit this specific dataset.

2026 PIERPAOLO MIRIZI. ALL RIGHT RESERVED

In [ ]: